1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 #include <petsc/private/vecimpl.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 15 #include <cmumps_c.h> 16 #include <zmumps_c.h> 17 #include <smumps_c.h> 18 #include <dmumps_c.h> 19 #else 20 #if defined(PETSC_USE_COMPLEX) 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <cmumps_c.h> 23 #define MUMPS_c cmumps_c 24 #define MumpsScalar CMUMPS_COMPLEX 25 #else 26 #include <zmumps_c.h> 27 #define MUMPS_c zmumps_c 28 #define MumpsScalar ZMUMPS_COMPLEX 29 #endif 30 #else 31 #if defined(PETSC_USE_REAL_SINGLE) 32 #include <smumps_c.h> 33 #define MUMPS_c smumps_c 34 #define MumpsScalar SMUMPS_REAL 35 #else 36 #include <dmumps_c.h> 37 #define MUMPS_c dmumps_c 38 #define MumpsScalar DMUMPS_REAL 39 #endif 40 #endif 41 #endif 42 #if defined(PETSC_USE_COMPLEX) 43 #if defined(PETSC_USE_REAL_SINGLE) 44 #define MUMPS_STRUC_C CMUMPS_STRUC_C 45 #else 46 #define MUMPS_STRUC_C ZMUMPS_STRUC_C 47 #endif 48 #else 49 #if defined(PETSC_USE_REAL_SINGLE) 50 #define MUMPS_STRUC_C SMUMPS_STRUC_C 51 #else 52 #define MUMPS_STRUC_C DMUMPS_STRUC_C 53 #endif 54 #endif 55 EXTERN_C_END 56 57 #define JOB_INIT -1 58 #define JOB_NULL 0 59 #define JOB_FACTSYMBOLIC 1 60 #define JOB_FACTNUMERIC 2 61 #define JOB_SOLVE 3 62 #define JOB_END -2 63 64 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 65 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 66 naming convention in PetscMPIInt, PetscBLASInt etc. 67 */ 68 typedef MUMPS_INT PetscMUMPSInt; 69 70 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 71 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 72 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 73 #endif 74 #else 75 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 76 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 77 #endif 78 #endif 79 80 #define MPIU_MUMPSINT MPI_INT 81 #define PETSC_MUMPS_INT_MAX 2147483647 82 #define PETSC_MUMPS_INT_MIN -2147483648 83 84 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 85 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 86 { 87 PetscFunctionBegin; 88 #if PetscDefined(USE_64BIT_INDICES) 89 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 90 #endif 91 *b = (PetscMUMPSInt)a; 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 /* Put these utility routines here since they are only used in this file */ 96 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 97 { 98 PetscInt myval; 99 PetscBool myset; 100 101 PetscFunctionBegin; 102 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 103 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 104 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 105 if (set) *set = myset; 106 PetscFunctionReturn(PETSC_SUCCESS); 107 } 108 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 109 110 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C. 111 // 112 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type. 113 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size 114 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall(). 115 // 116 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface. 117 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used. 118 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers. 119 typedef struct { 120 PetscPrecision precision; // precision used by MUMPS 121 void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex 122 123 // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions. 124 MUMPS_INT sym, par, job; 125 MUMPS_INT comm_fortran; /* Fortran communicator */ 126 MUMPS_INT *icntl; 127 void *cntl; // MumpsReal, fixed size array 128 MUMPS_INT n; 129 MUMPS_INT nblk; 130 131 /* Assembled entry */ 132 MUMPS_INT8 nnz; 133 MUMPS_INT *irn; 134 MUMPS_INT *jcn; 135 void *a; // MumpsScalar, centralized input 136 PetscCount a_len; 137 138 /* Distributed entry */ 139 MUMPS_INT8 nnz_loc; 140 MUMPS_INT *irn_loc; 141 MUMPS_INT *jcn_loc; 142 void *a_loc; // MumpsScalar, distributed input 143 PetscCount a_loc_len; 144 145 /* Matrix by blocks */ 146 MUMPS_INT *blkptr; 147 MUMPS_INT *blkvar; 148 149 /* Ordering, if given by user */ 150 MUMPS_INT *perm_in; 151 152 /* RHS, solution, ouptput data and statistics */ 153 void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers 154 PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar 155 156 MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc; 157 MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc; 158 // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0) 159 MUMPS_INT schur_lld; 160 MUMPS_INT *info, *infog; // fixed size array 161 void *rinfo, *rinfog; // MumpsReal, fixed size array 162 163 /* Null space */ 164 MUMPS_INT *pivnul_list; // allocated by MUMPS! 165 MUMPS_INT *mapping; // allocated by MUMPS! 166 167 /* Schur */ 168 MUMPS_INT size_schur; 169 MUMPS_INT *listvar_schur; 170 void *schur; // MumpsScalar 171 PetscCount schur_len; 172 173 /* For out-of-core */ 174 char *ooc_tmpdir; // fixed size array 175 char *ooc_prefix; // fixed size array 176 } XMUMPS_STRUC_C; 177 178 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct 179 #define AllocateInternalID(MUMPS_STRUC_T, outer) \ 180 do { \ 181 MUMPS_STRUC_T *inner; \ 182 PetscCall(PetscNew(&inner)); \ 183 outer->icntl = inner->icntl; \ 184 outer->cntl = inner->cntl; \ 185 outer->info = inner->info; \ 186 outer->infog = inner->infog; \ 187 outer->rinfo = inner->rinfo; \ 188 outer->rinfog = inner->rinfog; \ 189 outer->ooc_tmpdir = inner->ooc_tmpdir; \ 190 outer->ooc_prefix = inner->ooc_prefix; \ 191 /* the three field should never change after init */ \ 192 inner->comm_fortran = outer->comm_fortran; \ 193 inner->par = outer->par; \ 194 inner->sym = outer->sym; \ 195 outer->internal_id = inner; \ 196 } while (0) 197 198 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner 199 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision) 200 { 201 PetscFunctionBegin; 202 outer->precision = precision; 203 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 204 #if defined(PETSC_USE_COMPLEX) 205 if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(CMUMPS_STRUC_C, outer); 206 else AllocateInternalID(ZMUMPS_STRUC_C, outer); 207 #else 208 if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(SMUMPS_STRUC_C, outer); 209 else AllocateInternalID(DMUMPS_STRUC_C, outer); 210 #endif 211 #else 212 AllocateInternalID(MUMPS_STRUC_C, outer); 213 #endif 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \ 218 do { \ 219 MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \ 220 PetscCall(PetscFree(inner->a)); \ 221 PetscCall(PetscFree(inner->a_loc)); \ 222 PetscCall(PetscFree(inner->redrhs)); \ 223 PetscCall(PetscFree(inner->rhs)); \ 224 PetscCall(PetscFree(inner->rhs_sparse)); \ 225 PetscCall(PetscFree(inner->rhs_loc)); \ 226 PetscCall(PetscFree(inner->sol_loc)); \ 227 PetscCall(PetscFree(inner->schur)); \ 228 } while (0) 229 230 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer) 231 { 232 PetscFunctionBegin; 233 if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer 234 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 235 const PetscPrecision mumps_precision = outer->precision; 236 if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision 237 #if defined(PETSC_USE_COMPLEX) 238 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer); 239 else FreeInternalIDFields(ZMUMPS_STRUC_C, outer); 240 #else 241 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer); 242 else FreeInternalIDFields(DMUMPS_STRUC_C, outer); 243 #endif 244 } 245 #endif 246 PetscCall(PetscFree(outer->internal_id)); 247 } 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>. 252 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array. 253 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>) 254 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request. 255 // 256 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same. 257 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output. 258 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output. 259 // 260 // 261 // Input parameters: 262 // + convert - whether to do PetscScalar to MumpsScalar conversion 263 // . n - length of the PetscScalar array 264 // . pa - [n]], points to the PetscScalar array 265 // . precision - precision of MumpsScalar 266 // . m - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero. 267 // - ma - on input, an existing MumpsScalar array if any. 268 // 269 // Output parameters: 270 // + m - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0 271 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same. 272 // 273 // Note: 274 // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller. 275 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma) 276 { 277 PetscFunctionBegin; 278 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 279 const PetscPrecision mumps_precision = precision; 280 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision); 281 #if defined(PETSC_USE_COMPLEX) 282 if (mumps_precision != PETSC_SCALAR_PRECISION) { 283 if (mumps_precision == PETSC_PRECISION_SINGLE) { 284 if (*m < n) { 285 PetscCall(PetscFree(*ma)); 286 PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma)); 287 *m = n; 288 } 289 if (convert) { 290 CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma; 291 for (PetscCount i = 0; i < n; i++) { 292 b[i].r = PetscRealPart(pa[i]); 293 b[i].i = PetscImaginaryPart(pa[i]); 294 } 295 } 296 } else { 297 if (*m < n) { 298 PetscCall(PetscFree(*ma)); 299 PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma)); 300 *m = n; 301 } 302 if (convert) { 303 ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma; 304 for (PetscCount i = 0; i < n; i++) { 305 b[i].r = PetscRealPart(pa[i]); 306 b[i].i = PetscImaginaryPart(pa[i]); 307 } 308 } 309 } 310 } 311 #else 312 if (mumps_precision != PETSC_SCALAR_PRECISION) { 313 if (mumps_precision == PETSC_PRECISION_SINGLE) { 314 if (*m < n) { 315 PetscCall(PetscFree(*ma)); 316 PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma)); 317 *m = n; 318 } 319 if (convert) { 320 SMUMPS_REAL *b = *(SMUMPS_REAL **)ma; 321 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 322 } 323 } else { 324 if (*m < n) { 325 PetscCall(PetscFree(*ma)); 326 PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma)); 327 *m = n; 328 } 329 if (convert) { 330 DMUMPS_REAL *b = *(DMUMPS_REAL **)ma; 331 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 332 } 333 } 334 } 335 #endif 336 else 337 #endif 338 { 339 if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any 340 *ma = (void *)pa; // same precision, make them alias 341 *m = 0; 342 } 343 PetscFunctionReturn(PETSC_SUCCESS); 344 } 345 346 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>. 347 // 348 // 1) If the two types are different, cast array elements. 349 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 350 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa) 351 { 352 PetscFunctionBegin; 353 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 354 if (mumps_precision != PETSC_SCALAR_PRECISION) { 355 #if defined(PETSC_USE_COMPLEX) 356 if (mumps_precision == PETSC_PRECISION_SINGLE) { 357 PetscReal *a = (PetscReal *)pa; 358 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 359 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 360 } else { 361 PetscReal *a = (PetscReal *)pa; 362 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 363 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 364 } 365 #else 366 if (mumps_precision == PETSC_PRECISION_SINGLE) { 367 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 368 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 369 } else { 370 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 371 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 372 } 373 #endif 374 } else 375 #endif 376 PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>. 381 // 382 // 1) If the two types are different, cast array elements. 383 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 384 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma) 385 { 386 PetscFunctionBegin; 387 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 388 if (mumps_precision != PETSC_SCALAR_PRECISION) { 389 #if defined(PETSC_USE_COMPLEX) 390 if (mumps_precision == PETSC_PRECISION_SINGLE) { 391 CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma; 392 for (PetscCount i = 0; i < n; i++) { 393 b[i].r = PetscRealPart(pa[i]); 394 b[i].i = PetscImaginaryPart(pa[i]); 395 } 396 } else { 397 ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma; 398 for (PetscCount i = 0; i < n; i++) { 399 b[i].r = PetscRealPart(pa[i]); 400 b[i].i = PetscImaginaryPart(pa[i]); 401 } 402 } 403 #else 404 if (mumps_precision == PETSC_PRECISION_SINGLE) { 405 SMUMPS_REAL *b = (SMUMPS_REAL *)ma; 406 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 407 } else { 408 DMUMPS_REAL *b = (DMUMPS_REAL *)ma; 409 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 410 } 411 #endif 412 } else 413 #endif 414 PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n)); 415 PetscFunctionReturn(PETSC_SUCCESS); 416 } 417 418 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id) 419 { 420 return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT; 421 } 422 423 #define PreMumpsCall(inner, outer, mumpsscalar) \ 424 do { \ 425 inner->job = outer->job; \ 426 inner->n = outer->n; \ 427 inner->nblk = outer->nblk; \ 428 inner->nnz = outer->nnz; \ 429 inner->irn = outer->irn; \ 430 inner->jcn = outer->jcn; \ 431 inner->a = (mumpsscalar *)outer->a; \ 432 inner->nnz_loc = outer->nnz_loc; \ 433 inner->irn_loc = outer->irn_loc; \ 434 inner->jcn_loc = outer->jcn_loc; \ 435 inner->a_loc = (mumpsscalar *)outer->a_loc; \ 436 inner->blkptr = outer->blkptr; \ 437 inner->blkvar = outer->blkvar; \ 438 inner->perm_in = outer->perm_in; \ 439 inner->rhs = (mumpsscalar *)outer->rhs; \ 440 inner->redrhs = (mumpsscalar *)outer->redrhs; \ 441 inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \ 442 inner->sol_loc = (mumpsscalar *)outer->sol_loc; \ 443 inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \ 444 inner->irhs_sparse = outer->irhs_sparse; \ 445 inner->irhs_ptr = outer->irhs_ptr; \ 446 inner->isol_loc = outer->isol_loc; \ 447 inner->irhs_loc = outer->irhs_loc; \ 448 inner->nrhs = outer->nrhs; \ 449 inner->lrhs = outer->lrhs; \ 450 inner->lredrhs = outer->lredrhs; \ 451 inner->nz_rhs = outer->nz_rhs; \ 452 inner->lsol_loc = outer->lsol_loc; \ 453 inner->nloc_rhs = outer->nloc_rhs; \ 454 inner->lrhs_loc = outer->lrhs_loc; \ 455 inner->schur_lld = outer->schur_lld; \ 456 inner->size_schur = outer->size_schur; \ 457 inner->listvar_schur = outer->listvar_schur; \ 458 inner->schur = (mumpsscalar *)outer->schur; \ 459 } while (0) 460 461 #define PostMumpsCall(inner, outer) \ 462 do { \ 463 outer->pivnul_list = inner->pivnul_list; \ 464 outer->mapping = inner->mapping; \ 465 } while (0) 466 467 // Entry for PETSc to call mumps 468 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer) 469 { 470 PetscFunctionBegin; 471 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 472 #if defined(PETSC_USE_COMPLEX) 473 if (outer->precision == PETSC_PRECISION_SINGLE) { 474 CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id; 475 PreMumpsCall(inner, outer, CMUMPS_COMPLEX); 476 PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner)); 477 PostMumpsCall(inner, outer); 478 } else { 479 ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id; 480 PreMumpsCall(inner, outer, ZMUMPS_COMPLEX); 481 PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner)); 482 PostMumpsCall(inner, outer); 483 } 484 #else 485 if (outer->precision == PETSC_PRECISION_SINGLE) { 486 SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id; 487 PreMumpsCall(inner, outer, SMUMPS_REAL); 488 PetscStackCallExternalVoid("smumps_c", smumps_c(inner)); 489 PostMumpsCall(inner, outer); 490 } else { 491 DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id; 492 PreMumpsCall(inner, outer, DMUMPS_REAL); 493 PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner)); 494 PostMumpsCall(inner, outer); 495 } 496 #endif 497 #else 498 MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id; 499 PreMumpsCall(inner, outer, MumpsScalar); 500 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner)); 501 PostMumpsCall(inner, outer); 502 #endif 503 PetscFunctionReturn(PETSC_SUCCESS); 504 } 505 506 /* macros s.t. indices match MUMPS documentation */ 507 #define ICNTL(I) icntl[(I) - 1] 508 #define INFOG(I) infog[(I) - 1] 509 #define INFO(I) info[(I) - 1] 510 511 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I! 512 #if defined(PETSC_USE_COMPLEX) 513 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i) 514 #else 515 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I]) 516 #endif 517 518 // Get a value from MumpsReal arrays. The value is convertible to PetscReal. 519 #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1]) 520 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1]) 521 #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1]) 522 523 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL> 524 #define ID_CNTL_SET(ID, I, VAL) \ 525 do { \ 526 if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \ 527 else ((double *)(ID).cntl)[(I) - 1] = (VAL); \ 528 } while (0) 529 530 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 531 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 532 #define PetscMUMPS_c(mumps) \ 533 do { \ 534 if (mumps->use_petsc_omp_support) { \ 535 if (mumps->is_omp_master) { \ 536 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 537 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 538 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 539 PetscCall(PetscFPTrapPop()); \ 540 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 541 } \ 542 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 543 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 544 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 545 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 546 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 547 */ \ 548 MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \ 549 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 550 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 551 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 552 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 553 } else { \ 554 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 555 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 556 PetscCall(PetscFPTrapPop()); \ 557 } \ 558 } while (0) 559 #else 560 #define PetscMUMPS_c(mumps) \ 561 do { \ 562 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 563 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 564 PetscCall(PetscFPTrapPop()); \ 565 } while (0) 566 #endif 567 568 typedef struct Mat_MUMPS Mat_MUMPS; 569 struct Mat_MUMPS { 570 XMUMPS_STRUC_C id; 571 572 MatStructure matstruc; 573 PetscMPIInt myid, petsc_size; 574 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 575 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 576 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 577 PetscMUMPSInt sym; 578 MPI_Comm mumps_comm; 579 PetscMUMPSInt *ICNTL_pre; 580 PetscReal *CNTL_pre; 581 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 582 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 583 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 584 PetscMUMPSInt ICNTL26; 585 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 586 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 587 PetscInt *rhs_nrow, max_nrhs; 588 PetscMPIInt *rhs_recvcounts, *rhs_disps; 589 PetscScalar *rhs_loc, *rhs_recvbuf; 590 #endif 591 Vec b_seq, x_seq; 592 PetscInt ninfo, *info; /* which INFO to display */ 593 PetscInt sizeredrhs; 594 PetscScalar *schur_sol; 595 PetscInt schur_sizesol; 596 PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision 597 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 598 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 599 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 600 601 /* Support for MATNEST */ 602 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 603 PetscCount *nest_vals_start; 604 PetscScalar *nest_vals; 605 606 /* stuff used by petsc/mumps OpenMP support*/ 607 PetscBool use_petsc_omp_support; 608 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 609 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 610 PetscCount *recvcount; /* a collection of nnz on omp_master */ 611 PetscMPIInt tag, omp_comm_size; 612 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 613 MPI_Request *reqs; 614 }; 615 616 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 617 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 618 */ 619 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 620 { 621 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 622 623 PetscFunctionBegin; 624 #if defined(PETSC_USE_64BIT_INDICES) 625 { 626 PetscInt i; 627 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 628 PetscCall(PetscFree(mumps->ia_alloc)); 629 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 630 mumps->cur_ilen = nrow + 1; 631 } 632 if (nnz > mumps->cur_jlen) { 633 PetscCall(PetscFree(mumps->ja_alloc)); 634 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 635 mumps->cur_jlen = nnz; 636 } 637 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 638 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 639 *ia_mumps = mumps->ia_alloc; 640 *ja_mumps = mumps->ja_alloc; 641 } 642 #else 643 *ia_mumps = ia; 644 *ja_mumps = ja; 645 #endif 646 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 651 { 652 PetscFunctionBegin; 653 PetscCall(PetscFree(mumps->id.listvar_schur)); 654 PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID() 655 PetscCall(PetscFree(mumps->schur_sol)); 656 mumps->id.size_schur = 0; 657 mumps->id.schur_lld = 0; 658 if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 /* solve with rhs in mumps->id.redrhs and return in the same location */ 663 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 664 { 665 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 666 Mat S, B, X; // solve S*X = B; all three matrices are dense 667 MatFactorSchurStatus schurstatus; 668 PetscInt sizesol; 669 const PetscScalar *xarray; 670 671 PetscFunctionBegin; 672 PetscCall(MatFactorFactorizeSchurComplement(F)); 673 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 674 PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs)); 675 676 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B)); 677 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 678 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 679 PetscCall(MatBindToCPU(B, S->boundtocpu)); 680 #endif 681 switch (schurstatus) { 682 case MAT_FACTOR_SCHUR_FACTORED: 683 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X)); 684 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 685 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 686 PetscCall(MatBindToCPU(X, S->boundtocpu)); 687 #endif 688 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 689 PetscCall(MatMatSolveTranspose(S, B, X)); 690 } else { 691 PetscCall(MatMatSolve(S, B, X)); 692 } 693 break; 694 case MAT_FACTOR_SCHUR_INVERTED: 695 sizesol = mumps->id.nrhs * mumps->id.size_schur; 696 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 697 PetscCall(PetscFree(mumps->schur_sol)); 698 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 699 mumps->schur_sizesol = sizesol; 700 } 701 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 702 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 703 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 704 PetscCall(MatBindToCPU(X, S->boundtocpu)); 705 #endif 706 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 707 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 708 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 709 } else { 710 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 711 } 712 PetscCall(MatProductSetFromOptions(X)); 713 PetscCall(MatProductSymbolic(X)); 714 PetscCall(MatProductNumeric(X)); 715 716 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 717 break; 718 default: 719 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 720 } 721 // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device. 722 PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs. 723 PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs)); 724 PetscCall(MatDenseRestoreArrayRead(X, &xarray)); 725 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 726 PetscCall(MatDestroy(&B)); 727 PetscCall(MatDestroy(&X)); 728 PetscFunctionReturn(PETSC_SUCCESS); 729 } 730 731 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 732 { 733 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 734 735 PetscFunctionBegin; 736 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 if (!expansion) { /* prepare for the condensation step */ 740 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 741 /* allocate MUMPS internal array to store reduced right-hand sides */ 742 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 743 mumps->id.lredrhs = mumps->id.size_schur; 744 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 745 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 746 PetscCall(PetscFree(mumps->redrhs)); 747 PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs)); 748 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs)); 749 } 750 } else { /* prepare for the expansion step */ 751 PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */ 752 mumps->id.ICNTL(26) = 2; /* expansion phase */ 753 PetscMUMPS_c(mumps); 754 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 755 /* restore defaults */ 756 mumps->id.ICNTL(26) = -1; 757 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 758 if (mumps->id.nrhs > 1) { 759 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 760 PetscCall(PetscFree(mumps->redrhs)); 761 mumps->id.redrhs_len = 0; 762 mumps->id.lredrhs = 0; 763 mumps->sizeredrhs = 0; 764 } 765 } 766 PetscFunctionReturn(PETSC_SUCCESS); 767 } 768 769 /* 770 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 771 772 input: 773 A - matrix in aij,baij or sbaij format 774 shift - 0: C style output triple; 1: Fortran style output triple. 775 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 776 MAT_REUSE_MATRIX: only the values in v array are updated 777 output: 778 nnz - dim of r, c, and v (number of local nonzero entries of A) 779 r, c, v - row and col index, matrix values (matrix triples) 780 781 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 782 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 783 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 784 785 */ 786 787 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 788 { 789 const PetscScalar *av; 790 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 791 PetscCount nz, rnz, k; 792 PetscMUMPSInt *row, *col; 793 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 794 795 PetscFunctionBegin; 796 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 797 if (reuse == MAT_INITIAL_MATRIX) { 798 nz = aa->nz; 799 ai = aa->i; 800 aj = aa->j; 801 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 802 for (PetscCount i = k = 0; i < M; i++) { 803 rnz = ai[i + 1] - ai[i]; 804 ajj = aj + ai[i]; 805 for (PetscCount j = 0; j < rnz; j++) { 806 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 807 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 808 k++; 809 } 810 } 811 mumps->val = (PetscScalar *)av; 812 mumps->irn = row; 813 mumps->jcn = col; 814 mumps->nnz = nz; 815 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 816 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 817 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 818 PetscFunctionReturn(PETSC_SUCCESS); 819 } 820 821 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 822 { 823 PetscCount nz, i, j, k, r; 824 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 825 PetscMUMPSInt *row, *col; 826 827 PetscFunctionBegin; 828 nz = a->sliidx[a->totalslices]; 829 if (reuse == MAT_INITIAL_MATRIX) { 830 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 831 for (i = k = 0; i < a->totalslices; i++) { 832 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 833 } 834 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 835 mumps->irn = row; 836 mumps->jcn = col; 837 mumps->nnz = nz; 838 mumps->val = a->val; 839 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 840 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 845 { 846 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 847 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 848 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 849 PetscInt bs; 850 PetscMUMPSInt *row, *col; 851 852 PetscFunctionBegin; 853 if (reuse == MAT_INITIAL_MATRIX) { 854 PetscCall(MatGetBlockSize(A, &bs)); 855 M = A->rmap->N / bs; 856 ai = aa->i; 857 aj = aa->j; 858 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 859 for (i = 0; i < M; i++) { 860 ajj = aj + ai[i]; 861 rnz = ai[i + 1] - ai[i]; 862 for (k = 0; k < rnz; k++) { 863 for (j = 0; j < bs; j++) { 864 for (m = 0; m < bs; m++) { 865 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 866 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 867 idx++; 868 } 869 } 870 } 871 } 872 mumps->irn = row; 873 mumps->jcn = col; 874 mumps->nnz = nz; 875 mumps->val = aa->a; 876 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 877 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 882 { 883 const PetscInt *ai, *aj, *ajj; 884 PetscInt bs; 885 PetscCount nz, rnz, i, j, k, m; 886 PetscMUMPSInt *row, *col; 887 PetscScalar *val; 888 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 889 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 890 #if defined(PETSC_USE_COMPLEX) 891 PetscBool isset, hermitian; 892 #endif 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_COMPLEX) 896 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 897 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 898 #endif 899 ai = aa->i; 900 aj = aa->j; 901 PetscCall(MatGetBlockSize(A, &bs)); 902 if (reuse == MAT_INITIAL_MATRIX) { 903 const PetscCount alloc_size = aa->nz * bs2; 904 905 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 906 if (bs > 1) { 907 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 908 mumps->val = mumps->val_alloc; 909 } else { 910 mumps->val = aa->a; 911 } 912 mumps->irn = row; 913 mumps->jcn = col; 914 } else { 915 row = mumps->irn; 916 col = mumps->jcn; 917 } 918 val = mumps->val; 919 920 nz = 0; 921 if (bs > 1) { 922 for (i = 0; i < mbs; i++) { 923 rnz = ai[i + 1] - ai[i]; 924 ajj = aj + ai[i]; 925 for (j = 0; j < rnz; j++) { 926 for (k = 0; k < bs; k++) { 927 for (m = 0; m < bs; m++) { 928 if (ajj[j] > i || k >= m) { 929 if (reuse == MAT_INITIAL_MATRIX) { 930 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 931 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 932 } 933 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 934 } 935 } 936 } 937 } 938 } 939 } else if (reuse == MAT_INITIAL_MATRIX) { 940 for (i = 0; i < mbs; i++) { 941 rnz = ai[i + 1] - ai[i]; 942 ajj = aj + ai[i]; 943 for (j = 0; j < rnz; j++) { 944 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 945 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 946 nz++; 947 } 948 } 949 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 950 } else if (mumps->nest_vals) 951 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 952 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 953 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 954 PetscFunctionReturn(PETSC_SUCCESS); 955 } 956 957 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 958 { 959 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 960 PetscCount nz, rnz, i, j; 961 const PetscScalar *av, *v1; 962 PetscScalar *val; 963 PetscMUMPSInt *row, *col; 964 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 965 PetscBool diagDense; 966 #if defined(PETSC_USE_COMPLEX) 967 PetscBool hermitian, isset; 968 #endif 969 970 PetscFunctionBegin; 971 #if defined(PETSC_USE_COMPLEX) 972 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 973 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 974 #endif 975 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 976 ai = aa->i; 977 aj = aa->j; 978 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 979 if (reuse == MAT_INITIAL_MATRIX) { 980 /* count nz in the upper triangular part of A */ 981 nz = 0; 982 if (!diagDense) { 983 for (i = 0; i < M; i++) { 984 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 985 for (j = ai[i]; j < ai[i + 1]; j++) { 986 if (aj[j] < i) continue; 987 nz++; 988 } 989 } else { 990 nz += ai[i + 1] - adiag[i]; 991 } 992 } 993 } else { 994 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 995 } 996 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 997 PetscCall(PetscMalloc1(nz, &val)); 998 mumps->nnz = nz; 999 mumps->irn = row; 1000 mumps->jcn = col; 1001 mumps->val = mumps->val_alloc = val; 1002 1003 nz = 0; 1004 if (!diagDense) { 1005 for (i = 0; i < M; i++) { 1006 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1007 for (j = ai[i]; j < ai[i + 1]; j++) { 1008 if (aj[j] < i) continue; 1009 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1010 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 1011 val[nz] = av[j]; 1012 nz++; 1013 } 1014 } else { 1015 rnz = ai[i + 1] - adiag[i]; 1016 ajj = aj + adiag[i]; 1017 v1 = av + adiag[i]; 1018 for (j = 0; j < rnz; j++) { 1019 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1020 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1021 val[nz++] = v1[j]; 1022 } 1023 } 1024 } 1025 } else { 1026 for (i = 0; i < M; i++) { 1027 rnz = ai[i + 1] - adiag[i]; 1028 ajj = aj + adiag[i]; 1029 v1 = av + adiag[i]; 1030 for (j = 0; j < rnz; j++) { 1031 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1032 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1033 val[nz++] = v1[j]; 1034 } 1035 } 1036 } 1037 } else { 1038 nz = 0; 1039 val = mumps->val; 1040 if (!diagDense) { 1041 for (i = 0; i < M; i++) { 1042 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1043 for (j = ai[i]; j < ai[i + 1]; j++) { 1044 if (aj[j] < i) continue; 1045 val[nz++] = av[j]; 1046 } 1047 } else { 1048 rnz = ai[i + 1] - adiag[i]; 1049 v1 = av + adiag[i]; 1050 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1051 } 1052 } 1053 } else { 1054 for (i = 0; i < M; i++) { 1055 rnz = ai[i + 1] - adiag[i]; 1056 v1 = av + adiag[i]; 1057 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1058 } 1059 } 1060 } 1061 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 1062 PetscFunctionReturn(PETSC_SUCCESS); 1063 } 1064 1065 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1066 { 1067 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 1068 PetscInt bs; 1069 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 1070 PetscMUMPSInt *row, *col; 1071 const PetscScalar *av, *bv, *v1, *v2; 1072 PetscScalar *val; 1073 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 1074 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 1075 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1076 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 1077 #if defined(PETSC_USE_COMPLEX) 1078 PetscBool hermitian, isset; 1079 #endif 1080 1081 PetscFunctionBegin; 1082 #if defined(PETSC_USE_COMPLEX) 1083 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1084 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1085 #endif 1086 PetscCall(MatGetBlockSize(A, &bs)); 1087 rstart = A->rmap->rstart; 1088 ai = aa->i; 1089 aj = aa->j; 1090 bi = bb->i; 1091 bj = bb->j; 1092 av = aa->a; 1093 bv = bb->a; 1094 1095 garray = mat->garray; 1096 1097 if (reuse == MAT_INITIAL_MATRIX) { 1098 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 1099 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1100 PetscCall(PetscMalloc1(nz, &val)); 1101 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 1102 mumps->irn = row; 1103 mumps->jcn = col; 1104 mumps->val = mumps->val_alloc = val; 1105 } else { 1106 val = mumps->val; 1107 } 1108 1109 jj = 0; 1110 irow = rstart; 1111 for (i = 0; i < mbs; i++) { 1112 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1113 countA = ai[i + 1] - ai[i]; 1114 countB = bi[i + 1] - bi[i]; 1115 bjj = bj + bi[i]; 1116 v1 = av + ai[i] * bs2; 1117 v2 = bv + bi[i] * bs2; 1118 1119 if (bs > 1) { 1120 /* A-part */ 1121 for (j = 0; j < countA; j++) { 1122 for (k = 0; k < bs; k++) { 1123 for (m = 0; m < bs; m++) { 1124 if (rstart + ajj[j] * bs > irow || k >= m) { 1125 if (reuse == MAT_INITIAL_MATRIX) { 1126 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1127 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 1128 } 1129 val[jj++] = v1[j * bs2 + m + k * bs]; 1130 } 1131 } 1132 } 1133 } 1134 1135 /* B-part */ 1136 for (j = 0; j < countB; j++) { 1137 for (k = 0; k < bs; k++) { 1138 for (m = 0; m < bs; m++) { 1139 if (reuse == MAT_INITIAL_MATRIX) { 1140 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1141 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 1142 } 1143 val[jj++] = v2[j * bs2 + m + k * bs]; 1144 } 1145 } 1146 } 1147 } else { 1148 /* A-part */ 1149 for (j = 0; j < countA; j++) { 1150 if (reuse == MAT_INITIAL_MATRIX) { 1151 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1152 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1153 } 1154 val[jj++] = v1[j]; 1155 } 1156 1157 /* B-part */ 1158 for (j = 0; j < countB; j++) { 1159 if (reuse == MAT_INITIAL_MATRIX) { 1160 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1161 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1162 } 1163 val[jj++] = v2[j]; 1164 } 1165 } 1166 irow += bs; 1167 } 1168 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1173 { 1174 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1175 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 1176 PetscMUMPSInt *row, *col; 1177 const PetscScalar *av, *bv, *v1, *v2; 1178 PetscScalar *val; 1179 Mat Ad, Ao; 1180 Mat_SeqAIJ *aa; 1181 Mat_SeqAIJ *bb; 1182 1183 PetscFunctionBegin; 1184 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1185 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1186 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1187 1188 aa = (Mat_SeqAIJ *)Ad->data; 1189 bb = (Mat_SeqAIJ *)Ao->data; 1190 ai = aa->i; 1191 aj = aa->j; 1192 bi = bb->i; 1193 bj = bb->j; 1194 1195 rstart = A->rmap->rstart; 1196 cstart = A->cmap->rstart; 1197 1198 if (reuse == MAT_INITIAL_MATRIX) { 1199 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 1200 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1201 PetscCall(PetscMalloc1(nz, &val)); 1202 mumps->nnz = nz; 1203 mumps->irn = row; 1204 mumps->jcn = col; 1205 mumps->val = mumps->val_alloc = val; 1206 } else { 1207 val = mumps->val; 1208 } 1209 1210 jj = 0; 1211 irow = rstart; 1212 for (i = 0; i < m; i++) { 1213 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1214 countA = ai[i + 1] - ai[i]; 1215 countB = bi[i + 1] - bi[i]; 1216 bjj = bj + bi[i]; 1217 v1 = av + ai[i]; 1218 v2 = bv + bi[i]; 1219 1220 /* A-part */ 1221 for (j = 0; j < countA; j++) { 1222 if (reuse == MAT_INITIAL_MATRIX) { 1223 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1224 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 1225 } 1226 val[jj++] = v1[j]; 1227 } 1228 1229 /* B-part */ 1230 for (j = 0; j < countB; j++) { 1231 if (reuse == MAT_INITIAL_MATRIX) { 1232 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1233 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1234 } 1235 val[jj++] = v2[j]; 1236 } 1237 irow++; 1238 } 1239 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1240 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1245 { 1246 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 1247 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 1248 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1249 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 1250 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 1251 const PetscInt bs2 = mat->bs2; 1252 PetscInt bs; 1253 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 1254 PetscMUMPSInt *row, *col; 1255 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 1256 PetscScalar *val; 1257 1258 PetscFunctionBegin; 1259 PetscCall(MatGetBlockSize(A, &bs)); 1260 if (reuse == MAT_INITIAL_MATRIX) { 1261 nz = bs2 * (aa->nz + bb->nz); 1262 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1263 PetscCall(PetscMalloc1(nz, &val)); 1264 mumps->nnz = nz; 1265 mumps->irn = row; 1266 mumps->jcn = col; 1267 mumps->val = mumps->val_alloc = val; 1268 } else { 1269 val = mumps->val; 1270 } 1271 1272 jj = 0; 1273 irow = rstart; 1274 for (i = 0; i < mbs; i++) { 1275 countA = ai[i + 1] - ai[i]; 1276 countB = bi[i + 1] - bi[i]; 1277 ajj = aj + ai[i]; 1278 bjj = bj + bi[i]; 1279 v1 = av + bs2 * ai[i]; 1280 v2 = bv + bs2 * bi[i]; 1281 1282 idx = 0; 1283 /* A-part */ 1284 for (k = 0; k < countA; k++) { 1285 for (j = 0; j < bs; j++) { 1286 for (n = 0; n < bs; n++) { 1287 if (reuse == MAT_INITIAL_MATRIX) { 1288 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1289 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 1290 } 1291 val[jj++] = v1[idx++]; 1292 } 1293 } 1294 } 1295 1296 idx = 0; 1297 /* B-part */ 1298 for (k = 0; k < countB; k++) { 1299 for (j = 0; j < bs; j++) { 1300 for (n = 0; n < bs; n++) { 1301 if (reuse == MAT_INITIAL_MATRIX) { 1302 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1303 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 1304 } 1305 val[jj++] = v2[idx++]; 1306 } 1307 } 1308 } 1309 irow += bs; 1310 } 1311 PetscFunctionReturn(PETSC_SUCCESS); 1312 } 1313 1314 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1315 { 1316 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1317 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 1318 PetscMUMPSInt *row, *col; 1319 const PetscScalar *av, *bv, *v1, *v2; 1320 PetscScalar *val; 1321 Mat Ad, Ao; 1322 Mat_SeqAIJ *aa; 1323 Mat_SeqAIJ *bb; 1324 #if defined(PETSC_USE_COMPLEX) 1325 PetscBool hermitian, isset; 1326 #endif 1327 1328 PetscFunctionBegin; 1329 #if defined(PETSC_USE_COMPLEX) 1330 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1331 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1332 #endif 1333 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1334 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1335 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1336 1337 aa = (Mat_SeqAIJ *)Ad->data; 1338 bb = (Mat_SeqAIJ *)Ao->data; 1339 ai = aa->i; 1340 aj = aa->j; 1341 bi = bb->i; 1342 bj = bb->j; 1343 PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL)); 1344 rstart = A->rmap->rstart; 1345 1346 if (reuse == MAT_INITIAL_MATRIX) { 1347 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 1348 nzb = 0; /* num of upper triangular entries in mat->B */ 1349 for (i = 0; i < m; i++) { 1350 nza += (ai[i + 1] - adiag[i]); 1351 countB = bi[i + 1] - bi[i]; 1352 bjj = bj + bi[i]; 1353 for (j = 0; j < countB; j++) { 1354 if (garray[bjj[j]] > rstart) nzb++; 1355 } 1356 } 1357 1358 nz = nza + nzb; /* total nz of upper triangular part of mat */ 1359 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1360 PetscCall(PetscMalloc1(nz, &val)); 1361 mumps->nnz = nz; 1362 mumps->irn = row; 1363 mumps->jcn = col; 1364 mumps->val = mumps->val_alloc = val; 1365 } else { 1366 val = mumps->val; 1367 } 1368 1369 jj = 0; 1370 irow = rstart; 1371 for (i = 0; i < m; i++) { 1372 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 1373 v1 = av + adiag[i]; 1374 countA = ai[i + 1] - adiag[i]; 1375 countB = bi[i + 1] - bi[i]; 1376 bjj = bj + bi[i]; 1377 v2 = bv + bi[i]; 1378 1379 /* A-part */ 1380 for (j = 0; j < countA; j++) { 1381 if (reuse == MAT_INITIAL_MATRIX) { 1382 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1383 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1384 } 1385 val[jj++] = v1[j]; 1386 } 1387 1388 /* B-part */ 1389 for (j = 0; j < countB; j++) { 1390 if (garray[bjj[j]] > rstart) { 1391 if (reuse == MAT_INITIAL_MATRIX) { 1392 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1393 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1394 } 1395 val[jj++] = v2[j]; 1396 } 1397 } 1398 irow++; 1399 } 1400 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1401 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1402 PetscFunctionReturn(PETSC_SUCCESS); 1403 } 1404 1405 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1406 { 1407 const PetscScalar *av; 1408 const PetscInt M = A->rmap->n; 1409 PetscCount i; 1410 PetscMUMPSInt *row, *col; 1411 Vec v; 1412 1413 PetscFunctionBegin; 1414 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1415 PetscCall(VecGetArrayRead(v, &av)); 1416 if (reuse == MAT_INITIAL_MATRIX) { 1417 PetscCall(PetscMalloc2(M, &row, M, &col)); 1418 for (i = 0; i < M; i++) { 1419 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1420 col[i] = row[i]; 1421 } 1422 mumps->val = (PetscScalar *)av; 1423 mumps->irn = row; 1424 mumps->jcn = col; 1425 mumps->nnz = M; 1426 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1427 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1428 PetscCall(VecRestoreArrayRead(v, &av)); 1429 PetscFunctionReturn(PETSC_SUCCESS); 1430 } 1431 1432 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1433 { 1434 PetscScalar *v; 1435 const PetscInt m = A->rmap->n, N = A->cmap->N; 1436 PetscInt lda; 1437 PetscCount i, j; 1438 PetscMUMPSInt *row, *col; 1439 1440 PetscFunctionBegin; 1441 PetscCall(MatDenseGetArray(A, &v)); 1442 PetscCall(MatDenseGetLDA(A, &lda)); 1443 if (reuse == MAT_INITIAL_MATRIX) { 1444 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1445 for (i = 0; i < m; i++) { 1446 col[i] = 0; 1447 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1448 } 1449 for (j = 1; j < N; j++) { 1450 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1451 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1452 } 1453 if (lda == m) mumps->val = v; 1454 else { 1455 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1456 mumps->val_alloc = mumps->val; 1457 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1458 } 1459 mumps->irn = row; 1460 mumps->jcn = col; 1461 mumps->nnz = m * N; 1462 } else { 1463 if (lda == m && !mumps->nest_vals) mumps->val = v; 1464 else { 1465 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1466 } 1467 } 1468 PetscCall(MatDenseRestoreArray(A, &v)); 1469 PetscFunctionReturn(PETSC_SUCCESS); 1470 } 1471 1472 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 1473 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 1474 // and its rows and columns permuted 1475 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 1476 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 1477 { 1478 Mat A; 1479 PetscScalar s[2]; 1480 PetscBool isTrans, isHTrans, compare; 1481 1482 PetscFunctionBegin; 1483 do { 1484 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1485 if (isTrans) { 1486 PetscCall(MatTransposeGetMat(*sub, &A)); 1487 isHTrans = PETSC_FALSE; 1488 } else { 1489 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1490 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 1491 } 1492 compare = (PetscBool)(isTrans || isHTrans); 1493 if (compare) { 1494 if (vshift && vscale) { 1495 PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1496 if (!*conjugate) { 1497 *vshift += s[0] * *vscale; 1498 *vscale *= s[1]; 1499 } else { 1500 *vshift += PetscConj(s[0]) * *vscale; 1501 *vscale *= PetscConj(s[1]); 1502 } 1503 } 1504 if (swap) *swap = (PetscBool)!*swap; 1505 if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 1506 *sub = A; 1507 } 1508 } while (compare); 1509 PetscFunctionReturn(PETSC_SUCCESS); 1510 } 1511 1512 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1513 { 1514 Mat **mats; 1515 PetscInt nr, nc; 1516 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1517 1518 PetscFunctionBegin; 1519 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1520 if (reuse == MAT_INITIAL_MATRIX) { 1521 PetscMUMPSInt *irns, *jcns; 1522 PetscScalar *vals; 1523 PetscCount totnnz, cumnnz, maxnnz; 1524 PetscInt *pjcns_w, Mbs = 0; 1525 IS *rows, *cols; 1526 PetscInt **rows_idx, **cols_idx; 1527 1528 cumnnz = 0; 1529 maxnnz = 0; 1530 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1531 for (PetscInt r = 0; r < nr; r++) { 1532 for (PetscInt c = 0; c < nc; c++) { 1533 Mat sub = mats[r][c]; 1534 1535 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1536 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1537 if (sub) { 1538 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1539 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 1540 MatInfo info; 1541 1542 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 1543 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1544 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1545 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1546 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1547 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1548 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1549 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1550 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1551 1552 if (chol) { 1553 if (r == c) { 1554 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1555 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1556 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1557 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1558 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1559 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1560 } else { 1561 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1562 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1563 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1564 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1565 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1566 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1567 } 1568 } else { 1569 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1570 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1571 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1572 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1573 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1574 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1575 } 1576 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1577 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1578 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1579 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1580 maxnnz = PetscMax(maxnnz, info.nz_used); 1581 } 1582 } 1583 } 1584 1585 /* Allocate total COO */ 1586 totnnz = cumnnz; 1587 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1588 PetscCall(PetscMalloc1(totnnz, &vals)); 1589 1590 /* Handle rows and column maps 1591 We directly map rows and use an SF for the columns */ 1592 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1593 PetscCall(MatNestGetISs(A, rows, cols)); 1594 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1595 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1596 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1597 else (void)maxnnz; 1598 1599 cumnnz = 0; 1600 for (PetscInt r = 0; r < nr; r++) { 1601 for (PetscInt c = 0; c < nc; c++) { 1602 Mat sub = mats[r][c]; 1603 const PetscInt *ridx = rows_idx[r]; 1604 const PetscInt *cidx = cols_idx[c]; 1605 PetscScalar vscale = 1.0, vshift = 0.0; 1606 PetscInt rst, size, bs; 1607 PetscSF csf; 1608 PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 1609 PetscLayout cmap; 1610 PetscInt innz; 1611 1612 mumps->nest_vals_start[r * nc + c] = cumnnz; 1613 if (c == r) { 1614 PetscCall(ISGetSize(rows[r], &size)); 1615 if (!mumps->nest_convert_to_triples[r * nc + c]) { 1616 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1617 } 1618 PetscCall(MatGetBlockSize(sub, &bs)); 1619 Mbs += size / bs; 1620 } 1621 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1622 1623 /* Extract inner blocks if needed */ 1624 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 1625 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1626 1627 /* Get column layout to map off-process columns */ 1628 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1629 1630 /* Get row start to map on-process rows */ 1631 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1632 1633 /* Directly use the mumps datastructure and use C ordering for now */ 1634 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1635 1636 /* Swap the role of rows and columns indices for transposed blocks 1637 since we need values with global final ordering */ 1638 if (swap) { 1639 cidx = rows_idx[r]; 1640 ridx = cols_idx[c]; 1641 } 1642 1643 /* Communicate column indices 1644 This could have been done with a single SF but it would have complicated the code a lot. 1645 But since we do it only once, we pay the price of setting up an SF for each block */ 1646 if (PetscDefined(USE_64BIT_INDICES)) { 1647 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1648 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1649 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1650 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1651 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1652 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1653 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1654 PetscCall(PetscSFDestroy(&csf)); 1655 1656 /* Import indices: use direct map for rows and mapped indices for columns */ 1657 if (swap) { 1658 for (PetscInt k = 0; k < mumps->nnz; k++) { 1659 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1660 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1661 } 1662 } else { 1663 for (PetscInt k = 0; k < mumps->nnz; k++) { 1664 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1665 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1666 } 1667 } 1668 1669 /* Import values to full COO */ 1670 if (conjugate) { /* conjugate the entries */ 1671 PetscScalar *v = vals + cumnnz; 1672 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 1673 } else if (vscale != 1.0) { 1674 PetscScalar *v = vals + cumnnz; 1675 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 1676 } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1677 1678 /* Shift new starting point and sanity check */ 1679 cumnnz += mumps->nnz; 1680 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1681 1682 /* Free scratch memory */ 1683 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1684 PetscCall(PetscFree(mumps->val_alloc)); 1685 mumps->val = NULL; 1686 mumps->nnz = 0; 1687 } 1688 } 1689 if (mumps->id.ICNTL(15) == 1) { 1690 if (Mbs != A->rmap->N) { 1691 PetscMPIInt rank, size; 1692 1693 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1694 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1695 if (rank == 0) { 1696 PetscInt shift = 0; 1697 1698 PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 1699 PetscCall(PetscFree(mumps->id.blkptr)); 1700 PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 1701 mumps->id.blkptr[0] = 1; 1702 for (PetscInt i = 0; i < size; ++i) { 1703 for (PetscInt r = 0; r < nr; r++) { 1704 Mat sub = mats[r][r]; 1705 const PetscInt *ranges; 1706 PetscInt bs; 1707 1708 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1709 PetscCall(MatGetOwnershipRanges(sub, &ranges)); 1710 PetscCall(MatGetBlockSize(sub, &bs)); 1711 for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 1712 shift += (ranges[i + 1] - ranges[i]) / bs; 1713 } 1714 } 1715 } 1716 } else mumps->id.ICNTL(15) = 0; 1717 } 1718 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1719 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1720 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1721 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1722 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1723 mumps->nest_vals_start[nr * nc] = cumnnz; 1724 1725 /* Set pointers for final MUMPS data structure */ 1726 mumps->nest_vals = vals; 1727 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1728 mumps->val = vals; 1729 mumps->irn = irns; 1730 mumps->jcn = jcns; 1731 mumps->nnz = cumnnz; 1732 } else { 1733 PetscScalar *oval = mumps->nest_vals; 1734 for (PetscInt r = 0; r < nr; r++) { 1735 for (PetscInt c = 0; c < nc; c++) { 1736 PetscBool conjugate = PETSC_FALSE; 1737 Mat sub = mats[r][c]; 1738 PetscScalar vscale = 1.0, vshift = 0.0; 1739 PetscInt midx = r * nc + c; 1740 1741 if (!mumps->nest_convert_to_triples[midx]) continue; 1742 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 1743 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1744 mumps->val = oval + mumps->nest_vals_start[midx]; 1745 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1746 if (conjugate) { 1747 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1748 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 1749 } else if (vscale != 1.0) { 1750 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1751 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 1752 } 1753 } 1754 } 1755 mumps->val = oval; 1756 } 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 1760 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1761 { 1762 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1763 1764 PetscFunctionBegin; 1765 PetscCall(PetscFree(mumps->id.isol_loc)); 1766 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1767 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1768 PetscCall(VecDestroy(&mumps->b_seq)); 1769 PetscCall(VecDestroy(&mumps->x_seq)); 1770 PetscCall(PetscFree(mumps->id.perm_in)); 1771 PetscCall(PetscFree(mumps->id.blkvar)); 1772 PetscCall(PetscFree(mumps->id.blkptr)); 1773 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1774 PetscCall(PetscFree(mumps->val_alloc)); 1775 PetscCall(PetscFree(mumps->info)); 1776 PetscCall(PetscFree(mumps->ICNTL_pre)); 1777 PetscCall(PetscFree(mumps->CNTL_pre)); 1778 PetscCall(MatMumpsResetSchur_Private(mumps)); 1779 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1780 mumps->id.job = JOB_END; 1781 PetscMUMPS_c(mumps); 1782 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1783 if (mumps->mumps_comm != MPI_COMM_NULL) { 1784 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1785 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1786 } 1787 } 1788 PetscCall(MatMumpsFreeInternalID(&mumps->id)); 1789 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1790 if (mumps->use_petsc_omp_support) { 1791 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1792 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1793 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1794 } 1795 #endif 1796 PetscCall(PetscFree(mumps->ia_alloc)); 1797 PetscCall(PetscFree(mumps->ja_alloc)); 1798 PetscCall(PetscFree(mumps->recvcount)); 1799 PetscCall(PetscFree(mumps->reqs)); 1800 PetscCall(PetscFree(mumps->irhs_loc)); 1801 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1802 PetscCall(PetscFree(mumps->nest_vals)); 1803 PetscCall(PetscFree(A->data)); 1804 1805 /* clear composed functions */ 1806 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1807 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1808 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1809 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1810 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1811 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1812 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1813 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1814 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1815 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1816 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1817 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1818 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1819 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1820 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 1821 PetscFunctionReturn(PETSC_SUCCESS); 1822 } 1823 1824 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1825 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1826 { 1827 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1828 const PetscMPIInt ompsize = mumps->omp_comm_size; 1829 PetscInt i, m, M, rstart; 1830 1831 PetscFunctionBegin; 1832 PetscCall(MatGetSize(A, &M, NULL)); 1833 PetscCall(MatGetLocalSize(A, &m, NULL)); 1834 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1835 if (ompsize == 1) { 1836 if (!mumps->irhs_loc) { 1837 mumps->nloc_rhs = (PetscMUMPSInt)m; 1838 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1839 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1840 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1841 } 1842 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1843 } else { 1844 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1845 const PetscInt *ranges; 1846 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1847 MPI_Group petsc_group, omp_group; 1848 PetscScalar *recvbuf = NULL; 1849 1850 if (mumps->is_omp_master) { 1851 /* Lazily initialize the omp stuff for distributed rhs */ 1852 if (!mumps->irhs_loc) { 1853 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1854 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1855 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1856 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1857 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1858 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1859 1860 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1861 mumps->nloc_rhs = 0; 1862 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1863 for (j = 0; j < ompsize; j++) { 1864 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1865 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1866 } 1867 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1868 for (j = k = 0; j < ompsize; j++) { 1869 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */ 1870 } 1871 1872 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1873 PetscCallMPI(MPI_Group_free(&petsc_group)); 1874 PetscCallMPI(MPI_Group_free(&omp_group)); 1875 } 1876 1877 /* Realloc buffers when current nrhs is bigger than what we have met */ 1878 if (nrhs > mumps->max_nrhs) { 1879 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1880 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1881 mumps->max_nrhs = nrhs; 1882 } 1883 1884 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1885 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1886 mumps->rhs_disps[0] = 0; 1887 for (j = 1; j < ompsize; j++) { 1888 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1889 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1890 } 1891 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1892 } 1893 1894 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1895 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1896 1897 if (mumps->is_omp_master) { 1898 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1899 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1900 for (j = 0; j < ompsize; j++) { 1901 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1902 dst = dstbase; 1903 for (i = 0; i < nrhs; i++) { 1904 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1905 src += mumps->rhs_nrow[j]; 1906 dst += mumps->nloc_rhs; 1907 } 1908 dstbase += mumps->rhs_nrow[j]; 1909 } 1910 } 1911 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1912 } 1913 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1914 } 1915 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1916 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1917 mumps->id.lrhs_loc = mumps->nloc_rhs; 1918 mumps->id.irhs_loc = mumps->irhs_loc; 1919 PetscFunctionReturn(PETSC_SUCCESS); 1920 } 1921 1922 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1923 { 1924 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1925 const PetscScalar *barray = NULL; 1926 PetscScalar *array; 1927 IS is_iden, is_petsc; 1928 PetscInt i; 1929 PetscBool second_solve = PETSC_FALSE; 1930 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1931 1932 PetscFunctionBegin; 1933 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1934 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1935 &cite1)); 1936 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1937 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1938 &cite2)); 1939 1940 PetscCall(VecFlag(x, A->factorerrortype)); 1941 if (A->factorerrortype) { 1942 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 mumps->id.nrhs = 1; 1947 if (mumps->petsc_size > 1) { 1948 if (mumps->ICNTL20 == 10) { 1949 mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */ 1950 PetscCall(VecGetArrayRead(b, &barray)); 1951 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray)); 1952 } else { 1953 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/ 1954 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1955 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1956 if (!mumps->myid) { 1957 PetscCall(VecGetArray(mumps->b_seq, &array)); 1958 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1959 } 1960 } 1961 } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */ 1962 mumps->id.ICNTL(20) = 0; 1963 PetscCall(VecCopy(b, x)); 1964 PetscCall(VecGetArray(x, &array)); 1965 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1966 } 1967 1968 /* 1969 handle condensation step of Schur complement (if any) 1970 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1971 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1972 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1973 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1974 */ 1975 if (mumps->id.size_schur > 0) { 1976 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1977 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1978 second_solve = PETSC_TRUE; 1979 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 1980 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1981 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1982 } 1983 1984 mumps->id.job = JOB_SOLVE; 1985 PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs 1986 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1987 1988 /* handle expansion step of Schur complement (if any) */ 1989 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1990 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 1991 PetscCall(MatMumpsSolveSchur_Private(A)); 1992 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i); 1993 } 1994 1995 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1996 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1997 /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1998 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1999 } 2000 if (!mumps->scat_sol) { /* create scatter scat_sol */ 2001 PetscInt *isol2_loc = NULL; 2002 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 2003 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 2004 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 2005 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 2006 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 2007 PetscCall(ISDestroy(&is_iden)); 2008 PetscCall(ISDestroy(&is_petsc)); 2009 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 2010 } 2011 2012 PetscScalar *xarray; 2013 PetscCall(VecGetArray(mumps->x_seq, &xarray)); 2014 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray)); 2015 PetscCall(VecRestoreArray(mumps->x_seq, &xarray)); 2016 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2017 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2018 2019 if (mumps->ICNTL20 == 10) { // distributed RHS 2020 PetscCall(VecRestoreArrayRead(b, &barray)); 2021 } else if (!mumps->myid) { // centralized RHS 2022 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 2023 } 2024 } else { 2025 // id.rhs has the solution in mumps precision 2026 PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array)); 2027 PetscCall(VecRestoreArray(x, &array)); 2028 } 2029 2030 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 2035 { 2036 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2037 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2038 2039 PetscFunctionBegin; 2040 mumps->id.ICNTL(9) = 0; 2041 PetscCall(MatSolve_MUMPS(A, b, x)); 2042 mumps->id.ICNTL(9) = value; 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 2047 { 2048 Mat Bt = NULL; 2049 PetscBool denseX, denseB, flg, flgT; 2050 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2051 PetscInt i, nrhs, M, nrhsM; 2052 PetscScalar *array; 2053 const PetscScalar *barray; 2054 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 2055 PetscMUMPSInt *isol_loc, *isol_loc_save; 2056 PetscScalar *sol_loc; 2057 void *sol_loc_save; 2058 PetscCount sol_loc_len_save; 2059 IS is_to, is_from; 2060 PetscInt k, proc, j, m, myrstart; 2061 const PetscInt *rstart; 2062 Vec v_mpi, msol_loc; 2063 VecScatter scat_sol; 2064 Vec b_seq; 2065 VecScatter scat_rhs; 2066 PetscScalar *aa; 2067 PetscInt spnr, *ia, *ja; 2068 Mat_MPIAIJ *b = NULL; 2069 2070 PetscFunctionBegin; 2071 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 2072 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 2073 2074 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 2075 2076 if (denseB) { 2077 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 2078 mumps->id.ICNTL(20) = 0; /* dense RHS */ 2079 } else { /* sparse B */ 2080 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 2081 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 2082 PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 2083 PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 2084 /* input B is transpose of actual RHS matrix, 2085 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 2086 PetscCall(MatTransposeGetMat(B, &Bt)); 2087 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 2088 } 2089 2090 PetscCall(MatGetSize(B, &M, &nrhs)); 2091 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 2092 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 2093 mumps->id.lrhs = (PetscMUMPSInt)M; 2094 2095 if (mumps->petsc_size == 1) { // handle this easy case specially and return early 2096 PetscScalar *aa; 2097 PetscInt spnr, *ia, *ja; 2098 PetscBool second_solve = PETSC_FALSE; 2099 2100 PetscCall(MatDenseGetArray(X, &array)); 2101 if (denseB) { 2102 /* copy B to X */ 2103 PetscCall(MatDenseGetArrayRead(B, &barray)); 2104 PetscCall(PetscArraycpy(array, barray, nrhsM)); 2105 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2106 } else { /* sparse B */ 2107 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 2108 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2110 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2111 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2112 } 2113 PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2114 2115 /* handle condensation step of Schur complement (if any) */ 2116 if (mumps->id.size_schur > 0) { 2117 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 2118 second_solve = PETSC_TRUE; 2119 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 2120 mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */ 2121 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 2122 } 2123 2124 mumps->id.job = JOB_SOLVE; 2125 PetscMUMPS_c(mumps); 2126 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2127 2128 /* handle expansion step of Schur complement (if any) */ 2129 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 2130 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 2131 PetscCall(MatMumpsSolveSchur_Private(A)); 2132 for (j = 0; j < nrhs; ++j) 2133 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs); 2134 } 2135 2136 if (!denseB) { /* sparse B, restore ia, ja */ 2137 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 2138 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2139 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2140 } 2141 2142 // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X. 2143 PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array)); 2144 PetscCall(MatDenseRestoreArray(X, &array)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 2149 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 2150 2151 /* create msol_loc to hold mumps local solution */ 2152 isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */ 2153 sol_loc_save = mumps->id.sol_loc; 2154 sol_loc_len_save = mumps->id.sol_loc_len; 2155 mumps->id.isol_loc = NULL; // an init state 2156 mumps->id.sol_loc = NULL; 2157 mumps->id.sol_loc_len = 0; 2158 2159 lsol_loc = mumps->id.lsol_loc; 2160 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 2161 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 2162 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2163 mumps->id.isol_loc = isol_loc; 2164 2165 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 2166 2167 if (denseB) { 2168 if (mumps->ICNTL20 == 10) { 2169 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 2170 PetscCall(MatDenseGetArrayRead(B, &barray)); 2171 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc 2172 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2173 PetscCall(MatGetLocalSize(B, &m, NULL)); 2174 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X 2175 } else { 2176 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 2177 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 2178 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 2179 0, re-arrange B into desired order, which is a local operation. 2180 */ 2181 2182 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 2183 /* wrap dense rhs matrix B into a vector v_mpi */ 2184 PetscCall(MatGetLocalSize(B, &m, NULL)); 2185 PetscCall(MatDenseGetArrayRead(B, &barray)); 2186 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2187 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2188 2189 /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */ 2190 if (!mumps->myid) { 2191 PetscInt *idx; 2192 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 2193 PetscCall(PetscMalloc1(nrhsM, &idx)); 2194 PetscCall(MatGetOwnershipRanges(B, &rstart)); 2195 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 2196 for (j = 0; j < nrhs; j++) { 2197 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 2198 } 2199 } 2200 2201 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 2202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 2203 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 2204 } else { 2205 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 2206 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 2207 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 2208 } 2209 2210 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 2211 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2212 PetscCall(ISDestroy(&is_to)); 2213 PetscCall(ISDestroy(&is_from)); 2214 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2215 2216 if (!mumps->myid) { /* define rhs on the host */ 2217 PetscCall(VecGetArrayRead(b_seq, &barray)); 2218 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2219 PetscCall(VecRestoreArrayRead(b_seq, &barray)); 2220 } 2221 } 2222 } else { /* sparse B */ 2223 b = (Mat_MPIAIJ *)Bt->data; 2224 2225 /* wrap dense X into a vector v_mpi */ 2226 PetscCall(MatGetLocalSize(X, &m, NULL)); 2227 PetscCall(MatDenseGetArrayRead(X, &barray)); 2228 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2229 PetscCall(MatDenseRestoreArrayRead(X, &barray)); 2230 2231 if (!mumps->myid) { 2232 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 2233 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2234 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2235 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2236 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2237 } else { 2238 mumps->id.irhs_ptr = NULL; 2239 mumps->id.irhs_sparse = NULL; 2240 mumps->id.nz_rhs = 0; 2241 if (mumps->id.rhs_sparse_len) { 2242 PetscCall(PetscFree(mumps->id.rhs_sparse)); 2243 mumps->id.rhs_sparse_len = 0; 2244 } 2245 } 2246 } 2247 2248 /* solve phase */ 2249 mumps->id.job = JOB_SOLVE; 2250 PetscMUMPS_c(mumps); 2251 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2252 2253 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 2254 PetscCall(MatDenseGetArray(X, &array)); 2255 PetscCall(VecPlaceArray(v_mpi, array)); 2256 2257 /* create scatter scat_sol */ 2258 PetscCall(MatGetOwnershipRanges(X, &rstart)); 2259 /* iidx: index for scatter mumps solution to PETSc X */ 2260 2261 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 2262 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 2263 for (i = 0; i < lsol_loc; i++) { 2264 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 2265 2266 for (proc = 0; proc < mumps->petsc_size; proc++) { 2267 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 2268 myrstart = rstart[proc]; 2269 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 2270 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 2271 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 2272 break; 2273 } 2274 } 2275 2276 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 2277 } 2278 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 2279 PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[] 2280 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 2281 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2282 PetscCall(ISDestroy(&is_from)); 2283 PetscCall(ISDestroy(&is_to)); 2284 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2285 PetscCall(MatDenseRestoreArray(X, &array)); 2286 2287 if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers 2288 mumps->id.sol_loc_len = 0; 2289 PetscCall(PetscFree(mumps->id.sol_loc)); 2290 } 2291 2292 // restore old values 2293 mumps->id.sol_loc = sol_loc_save; 2294 mumps->id.sol_loc_len = sol_loc_len_save; 2295 mumps->id.isol_loc = isol_loc_save; 2296 2297 PetscCall(PetscFree2(sol_loc, isol_loc)); 2298 PetscCall(PetscFree(idxx)); 2299 PetscCall(VecDestroy(&msol_loc)); 2300 PetscCall(VecDestroy(&v_mpi)); 2301 if (!denseB) { 2302 if (!mumps->myid) { 2303 b = (Mat_MPIAIJ *)Bt->data; 2304 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 2305 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2306 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2307 } 2308 } else { 2309 if (mumps->ICNTL20 == 0) { 2310 PetscCall(VecDestroy(&b_seq)); 2311 PetscCall(VecScatterDestroy(&scat_rhs)); 2312 } 2313 } 2314 PetscCall(VecScatterDestroy(&scat_sol)); 2315 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2316 PetscFunctionReturn(PETSC_SUCCESS); 2317 } 2318 2319 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 2320 { 2321 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2322 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2323 2324 PetscFunctionBegin; 2325 mumps->id.ICNTL(9) = 0; 2326 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2327 mumps->id.ICNTL(9) = value; 2328 PetscFunctionReturn(PETSC_SUCCESS); 2329 } 2330 2331 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 2332 { 2333 PetscBool flg; 2334 Mat B; 2335 2336 PetscFunctionBegin; 2337 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2338 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 2339 2340 /* Create B=Bt^T that uses Bt's data structure */ 2341 PetscCall(MatCreateTranspose(Bt, &B)); 2342 2343 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2344 PetscCall(MatDestroy(&B)); 2345 PetscFunctionReturn(PETSC_SUCCESS); 2346 } 2347 2348 #if !defined(PETSC_USE_COMPLEX) 2349 /* 2350 input: 2351 F: numeric factor 2352 output: 2353 nneg: total number of negative pivots 2354 nzero: total number of zero pivots 2355 npos: (global dimension of F) - nneg - nzero 2356 */ 2357 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 2358 { 2359 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2360 PetscMPIInt size; 2361 2362 PetscFunctionBegin; 2363 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 2364 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 2365 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 2366 2367 if (nneg) *nneg = mumps->id.INFOG(12); 2368 if (nzero || npos) { 2369 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2370 if (nzero) *nzero = mumps->id.INFOG(28); 2371 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 2372 } 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 #endif 2376 2377 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 2378 { 2379 PetscMPIInt nreqs; 2380 PetscMUMPSInt *irn, *jcn; 2381 PetscMPIInt count; 2382 PetscCount totnnz, remain; 2383 const PetscInt osize = mumps->omp_comm_size; 2384 PetscScalar *val; 2385 2386 PetscFunctionBegin; 2387 if (osize > 1) { 2388 if (reuse == MAT_INITIAL_MATRIX) { 2389 /* master first gathers counts of nonzeros to receive */ 2390 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 2391 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 2392 2393 /* Then each computes number of send/recvs */ 2394 if (mumps->is_omp_master) { 2395 /* Start from 1 since self communication is not done in MPI */ 2396 nreqs = 0; 2397 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 2398 } else { 2399 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 2400 } 2401 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 2402 2403 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 2404 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 2405 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 2406 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 2407 */ 2408 nreqs = 0; /* counter for actual send/recvs */ 2409 if (mumps->is_omp_master) { 2410 totnnz = 0; 2411 2412 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 2413 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 2414 PetscCall(PetscMalloc1(totnnz, &val)); 2415 2416 /* Self communication */ 2417 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 2418 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 2419 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 2420 2421 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 2422 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 2423 PetscCall(PetscFree(mumps->val_alloc)); 2424 mumps->nnz = totnnz; 2425 mumps->irn = irn; 2426 mumps->jcn = jcn; 2427 mumps->val = mumps->val_alloc = val; 2428 2429 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2430 jcn += mumps->recvcount[0]; 2431 val += mumps->recvcount[0]; 2432 2433 /* Remote communication */ 2434 for (PetscMPIInt i = 1; i < osize; i++) { 2435 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2436 remain = mumps->recvcount[i] - count; 2437 while (count > 0) { 2438 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2439 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2440 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2441 irn += count; 2442 jcn += count; 2443 val += count; 2444 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2445 remain -= count; 2446 } 2447 } 2448 } else { 2449 irn = mumps->irn; 2450 jcn = mumps->jcn; 2451 val = mumps->val; 2452 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2453 remain = mumps->nnz - count; 2454 while (count > 0) { 2455 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2456 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2457 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2458 irn += count; 2459 jcn += count; 2460 val += count; 2461 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2462 remain -= count; 2463 } 2464 } 2465 } else { 2466 nreqs = 0; 2467 if (mumps->is_omp_master) { 2468 val = mumps->val + mumps->recvcount[0]; 2469 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 2470 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2471 remain = mumps->recvcount[i] - count; 2472 while (count > 0) { 2473 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2474 val += count; 2475 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2476 remain -= count; 2477 } 2478 } 2479 } else { 2480 val = mumps->val; 2481 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2482 remain = mumps->nnz - count; 2483 while (count > 0) { 2484 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2485 val += count; 2486 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2487 remain -= count; 2488 } 2489 } 2490 } 2491 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2492 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2493 } 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2498 { 2499 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2500 2501 PetscFunctionBegin; 2502 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2503 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2504 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2505 PetscFunctionReturn(PETSC_SUCCESS); 2506 } 2507 2508 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2509 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2510 2511 /* numerical factorization phase */ 2512 mumps->id.job = JOB_FACTNUMERIC; 2513 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2514 if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2515 } else { 2516 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2517 } 2518 2519 if (F->schur) { 2520 const PetscScalar *array; 2521 MUMPS_INT size = mumps->id.size_schur; 2522 PetscCall(MatDenseGetArrayRead(F->schur, &array)); 2523 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur)); 2524 PetscCall(MatDenseRestoreArrayRead(F->schur, &array)); 2525 } 2526 2527 if (mumps->id.ICNTL(22)) PetscCall(PetscStrncpy(mumps->id.ooc_prefix, ((PetscObject)F)->prefix, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_prefix))); 2528 2529 PetscMUMPS_c(mumps); 2530 if (mumps->id.INFOG(1) < 0) { 2531 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2532 if (mumps->id.INFOG(1) == -10) { 2533 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2534 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2535 } else if (mumps->id.INFOG(1) == -13) { 2536 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2537 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2538 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2539 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2540 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2541 } else { 2542 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2543 F->factorerrortype = MAT_FACTOR_OTHER; 2544 } 2545 } 2546 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2547 2548 F->assembled = PETSC_TRUE; 2549 2550 if (F->schur) { /* reset Schur status to unfactored */ 2551 #if defined(PETSC_HAVE_CUDA) 2552 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2553 #endif 2554 PetscScalar *array; 2555 PetscCall(MatDenseGetArray(F->schur, &array)); 2556 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array)); 2557 PetscCall(MatDenseRestoreArray(F->schur, &array)); 2558 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2559 mumps->id.ICNTL(19) = 2; 2560 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2561 } 2562 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2563 } 2564 2565 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2566 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2567 2568 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2569 // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the 2570 // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for 2571 // which the solution (in SOL_loc) is available on the local processor. 2572 // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will 2573 // normally have the same contents for each of these calls. The only exception is the case of 2574 // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non 2575 // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase. 2576 if (mumps->petsc_size > 1) { 2577 PetscInt lsol_loc; 2578 PetscScalar *array; 2579 2580 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2581 if (mumps->x_seq) { 2582 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2583 PetscCall(PetscFree(mumps->id.isol_loc)); 2584 PetscCall(VecDestroy(&mumps->x_seq)); 2585 } 2586 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2587 PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc)); 2588 PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq)); 2589 PetscCall(VecGetArray(mumps->x_seq, &array)); 2590 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2591 PetscCall(VecRestoreArray(mumps->x_seq, &array)); 2592 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2593 } 2594 PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2))); 2595 PetscFunctionReturn(PETSC_SUCCESS); 2596 } 2597 2598 /* Sets MUMPS options from the options database */ 2599 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2600 { 2601 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2602 PetscReal cntl; 2603 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2604 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2605 PetscBool flg = PETSC_FALSE; 2606 PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1); 2607 void *arr; 2608 2609 PetscFunctionBegin; 2610 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2611 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2612 PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE; 2613 PetscInt nthreads = 0; 2614 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2615 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2616 PetscMUMPSInt nblk, *blkvar, *blkptr; 2617 2618 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2619 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2620 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2621 2622 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2623 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2624 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2625 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2626 if (mumps->use_petsc_omp_support) { 2627 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2628 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2629 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2630 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2631 #else 2632 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2633 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2634 #endif 2635 } else { 2636 mumps->omp_comm = PETSC_COMM_SELF; 2637 mumps->mumps_comm = mumps->petsc_comm; 2638 mumps->is_omp_master = PETSC_TRUE; 2639 } 2640 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2641 mumps->reqs = NULL; 2642 mumps->tag = 0; 2643 2644 if (mumps->mumps_comm != MPI_COMM_NULL) { 2645 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2646 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2647 MPI_Comm comm; 2648 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2649 mumps->mumps_comm = comm; 2650 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2651 } 2652 2653 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2654 mumps->id.job = JOB_INIT; 2655 mumps->id.par = 1; /* host participates factorizaton and solve */ 2656 mumps->id.sym = mumps->sym; 2657 2658 size = mumps->id.size_schur; 2659 arr = mumps->id.schur; 2660 listvar_schur = mumps->id.listvar_schur; 2661 nblk = mumps->id.nblk; 2662 blkvar = mumps->id.blkvar; 2663 blkptr = mumps->id.blkptr; 2664 if (PetscDefined(USE_DEBUG)) { 2665 for (PetscInt i = 0; i < size; i++) 2666 PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1, 2667 A->rmap->N); 2668 } 2669 2670 PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL)); 2671 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]); 2672 PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar"); 2673 PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision)); 2674 2675 PetscMUMPS_c(mumps); 2676 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2677 2678 /* set PETSc-MUMPS default options - override MUMPS default */ 2679 mumps->id.ICNTL(3) = 0; 2680 mumps->id.ICNTL(4) = 0; 2681 if (mumps->petsc_size == 1) { 2682 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2683 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2684 } else { 2685 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2686 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2687 } 2688 if (nblk && blkptr) { 2689 mumps->id.ICNTL(15) = 1; 2690 mumps->id.nblk = nblk; 2691 mumps->id.blkvar = blkvar; 2692 mumps->id.blkptr = blkptr; 2693 } else mumps->id.ICNTL(15) = 0; 2694 2695 /* restore cached ICNTL and CNTL values */ 2696 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2697 for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]); 2698 2699 PetscCall(PetscFree(mumps->ICNTL_pre)); 2700 PetscCall(PetscFree(mumps->CNTL_pre)); 2701 2702 if (schur) { 2703 mumps->id.size_schur = size; 2704 mumps->id.schur_lld = size; 2705 mumps->id.schur = arr; 2706 mumps->id.listvar_schur = listvar_schur; 2707 if (mumps->petsc_size > 1) { 2708 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2709 2710 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2711 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2712 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm)); 2713 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2714 } else { 2715 if (F->factortype == MAT_FACTOR_LU) { 2716 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2717 } else { 2718 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2719 } 2720 } 2721 mumps->id.ICNTL(26) = -1; 2722 } 2723 2724 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2725 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2726 */ 2727 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2728 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); 2729 2730 mumps->scat_rhs = NULL; 2731 mumps->scat_sol = NULL; 2732 } 2733 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2734 if (flg) mumps->id.ICNTL(1) = icntl; 2735 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2736 if (flg) mumps->id.ICNTL(2) = icntl; 2737 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2738 if (flg) mumps->id.ICNTL(3) = icntl; 2739 2740 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2741 if (flg) mumps->id.ICNTL(4) = icntl; 2742 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2743 2744 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2745 if (flg) mumps->id.ICNTL(6) = icntl; 2746 2747 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2748 if (flg) { 2749 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2750 mumps->id.ICNTL(7) = icntl; 2751 } 2752 2753 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2754 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2755 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2756 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2757 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2758 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2759 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2760 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2761 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2762 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2763 if (flg) { 2764 if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2765 else if (mumps->id.ICNTL(15) > 0) { 2766 const PetscInt *bsizes; 2767 PetscInt nblocks, p, *blkptr = NULL; 2768 PetscMPIInt *recvcounts, *displs, n; 2769 PetscMPIInt rank, size = 0; 2770 2771 PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 2772 flg = PETSC_TRUE; 2773 for (p = 0; p < nblocks; ++p) { 2774 if (bsizes[p] > 1) break; 2775 } 2776 if (p == nblocks) flg = PETSC_FALSE; 2777 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 2778 if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 2779 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2780 if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2781 PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 2782 PetscCall(PetscMPIIntCast(nblocks, &n)); 2783 PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 2784 for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 2785 PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 2786 PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2787 PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2788 if (rank == 0) { 2789 blkptr[0] = 1; 2790 for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 2791 PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 2792 } 2793 PetscCall(PetscFree2(recvcounts, displs)); 2794 PetscCall(PetscFree(blkptr)); 2795 } 2796 } 2797 } 2798 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2799 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2800 PetscCall(MatDestroy(&F->schur)); 2801 PetscCall(MatMumpsResetSchur_Private(mumps)); 2802 } 2803 2804 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2805 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2806 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2807 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2808 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2809 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2810 */ 2811 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2812 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2813 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2814 #endif 2815 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2816 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2817 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2818 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2819 #endif 2820 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2821 2822 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), &flg)); 2823 if (flg && mumps->id.ICNTL(22) != 1) mumps->id.ICNTL(22) = 0; // MUMPS treats values other than 1 as 0. Normalize it so we can safely use 'if (mumps->id.ICNTL(22))' 2824 if (mumps->id.ICNTL(22)) { 2825 // MUMPS will use the /tmp directory if -mat_mumps_ooc_tmpdir is not set by user. 2826 // We don't provide option -mat_mumps_ooc_prefix, as we use F's prefix as OOC_PREFIX, which is set later during MatFactorNumeric_MUMPS() to also handle cases where users enable OOC via MatMumpsSetICNTL(). 2827 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "Out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir), NULL)); 2828 } 2829 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2830 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2831 if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 2832 2833 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2834 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2835 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2836 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2837 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2838 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2839 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2840 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2841 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2842 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2843 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2844 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2845 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2846 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2847 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL)); 2848 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2849 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2850 2851 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg)); 2852 if (flg) ID_CNTL_SET(mumps->id, 1, cntl); 2853 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg)); 2854 if (flg) ID_CNTL_SET(mumps->id, 2, cntl); 2855 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg)); 2856 if (flg) ID_CNTL_SET(mumps->id, 3, cntl); 2857 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg)); 2858 if (flg) ID_CNTL_SET(mumps->id, 4, cntl); 2859 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg)); 2860 if (flg) ID_CNTL_SET(mumps->id, 5, cntl); 2861 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg)); 2862 if (flg) ID_CNTL_SET(mumps->id, 7, cntl); 2863 2864 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2865 if (ninfo) { 2866 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2867 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2868 mumps->ninfo = ninfo; 2869 for (i = 0; i < ninfo; i++) { 2870 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2871 mumps->info[i] = info[i]; 2872 } 2873 } 2874 PetscOptionsEnd(); 2875 PetscFunctionReturn(PETSC_SUCCESS); 2876 } 2877 2878 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2879 { 2880 PetscFunctionBegin; 2881 if (mumps->id.INFOG(1) < 0) { 2882 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2883 if (mumps->id.INFOG(1) == -6) { 2884 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2885 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2886 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2887 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2888 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2889 } else { 2890 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2891 F->factorerrortype = MAT_FACTOR_OTHER; 2892 } 2893 } 2894 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2895 PetscFunctionReturn(PETSC_SUCCESS); 2896 } 2897 2898 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2899 { 2900 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2901 Vec b; 2902 const PetscInt M = A->rmap->N; 2903 2904 PetscFunctionBegin; 2905 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2906 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2907 PetscFunctionReturn(PETSC_SUCCESS); 2908 } 2909 2910 /* Set MUMPS options from the options database */ 2911 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2912 2913 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2914 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2915 2916 /* analysis phase */ 2917 mumps->id.job = JOB_FACTSYMBOLIC; 2918 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2919 switch (mumps->id.ICNTL(18)) { 2920 case 0: /* centralized assembled matrix input */ 2921 if (!mumps->myid) { 2922 mumps->id.nnz = mumps->nnz; 2923 mumps->id.irn = mumps->irn; 2924 mumps->id.jcn = mumps->jcn; 2925 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2926 if (r && mumps->id.ICNTL(7) == 7) { 2927 mumps->id.ICNTL(7) = 1; 2928 if (!mumps->myid) { 2929 const PetscInt *idx; 2930 PetscInt i; 2931 2932 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2933 PetscCall(ISGetIndices(r, &idx)); 2934 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2935 PetscCall(ISRestoreIndices(r, &idx)); 2936 } 2937 } 2938 } 2939 break; 2940 case 3: /* distributed assembled matrix input (size>1) */ 2941 mumps->id.nnz_loc = mumps->nnz; 2942 mumps->id.irn_loc = mumps->irn; 2943 mumps->id.jcn_loc = mumps->jcn; 2944 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2945 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2946 PetscCall(MatCreateVecs(A, NULL, &b)); 2947 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2948 PetscCall(VecDestroy(&b)); 2949 } 2950 break; 2951 } 2952 PetscMUMPS_c(mumps); 2953 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2954 2955 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2956 F->ops->solve = MatSolve_MUMPS; 2957 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2958 F->ops->matsolve = MatMatSolve_MUMPS; 2959 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2960 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2961 2962 mumps->matstruc = SAME_NONZERO_PATTERN; 2963 PetscFunctionReturn(PETSC_SUCCESS); 2964 } 2965 2966 /* Note the PETSc r and c permutations are ignored */ 2967 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2968 { 2969 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2970 Vec b; 2971 const PetscInt M = A->rmap->N; 2972 2973 PetscFunctionBegin; 2974 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2975 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2976 PetscFunctionReturn(PETSC_SUCCESS); 2977 } 2978 2979 /* Set MUMPS options from the options database */ 2980 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2981 2982 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2983 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2984 2985 /* analysis phase */ 2986 mumps->id.job = JOB_FACTSYMBOLIC; 2987 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2988 switch (mumps->id.ICNTL(18)) { 2989 case 0: /* centralized assembled matrix input */ 2990 if (!mumps->myid) { 2991 mumps->id.nnz = mumps->nnz; 2992 mumps->id.irn = mumps->irn; 2993 mumps->id.jcn = mumps->jcn; 2994 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2995 } 2996 break; 2997 case 3: /* distributed assembled matrix input (size>1) */ 2998 mumps->id.nnz_loc = mumps->nnz; 2999 mumps->id.irn_loc = mumps->irn; 3000 mumps->id.jcn_loc = mumps->jcn; 3001 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 3002 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 3003 PetscCall(MatCreateVecs(A, NULL, &b)); 3004 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 3005 PetscCall(VecDestroy(&b)); 3006 } 3007 break; 3008 } 3009 PetscMUMPS_c(mumps); 3010 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3011 3012 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 3013 F->ops->solve = MatSolve_MUMPS; 3014 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 3015 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3016 3017 mumps->matstruc = SAME_NONZERO_PATTERN; 3018 PetscFunctionReturn(PETSC_SUCCESS); 3019 } 3020 3021 /* Note the PETSc r permutation and factor info are ignored */ 3022 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 3023 { 3024 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3025 Vec b; 3026 const PetscInt M = A->rmap->N; 3027 3028 PetscFunctionBegin; 3029 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 3030 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 3031 PetscFunctionReturn(PETSC_SUCCESS); 3032 } 3033 3034 /* Set MUMPS options from the options database */ 3035 PetscCall(MatSetFromOptions_MUMPS(F, A)); 3036 3037 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 3038 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 3039 3040 /* analysis phase */ 3041 mumps->id.job = JOB_FACTSYMBOLIC; 3042 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 3043 switch (mumps->id.ICNTL(18)) { 3044 case 0: /* centralized assembled matrix input */ 3045 if (!mumps->myid) { 3046 mumps->id.nnz = mumps->nnz; 3047 mumps->id.irn = mumps->irn; 3048 mumps->id.jcn = mumps->jcn; 3049 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 3050 } 3051 break; 3052 case 3: /* distributed assembled matrix input (size>1) */ 3053 mumps->id.nnz_loc = mumps->nnz; 3054 mumps->id.irn_loc = mumps->irn; 3055 mumps->id.jcn_loc = mumps->jcn; 3056 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 3057 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 3058 PetscCall(MatCreateVecs(A, NULL, &b)); 3059 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 3060 PetscCall(VecDestroy(&b)); 3061 } 3062 break; 3063 } 3064 PetscMUMPS_c(mumps); 3065 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3066 3067 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 3068 F->ops->solve = MatSolve_MUMPS; 3069 F->ops->solvetranspose = MatSolve_MUMPS; 3070 F->ops->matsolve = MatMatSolve_MUMPS; 3071 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 3072 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3073 #if defined(PETSC_USE_COMPLEX) 3074 F->ops->getinertia = NULL; 3075 #else 3076 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 3077 #endif 3078 3079 mumps->matstruc = SAME_NONZERO_PATTERN; 3080 PetscFunctionReturn(PETSC_SUCCESS); 3081 } 3082 3083 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 3084 { 3085 PetscBool isascii; 3086 PetscViewerFormat format; 3087 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3088 3089 PetscFunctionBegin; 3090 /* check if matrix is mumps type */ 3091 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 3092 3093 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 3094 if (isascii) { 3095 PetscCall(PetscViewerGetFormat(viewer, &format)); 3096 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3097 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 3098 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3099 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 3100 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 3101 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 3102 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 3103 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 3104 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 3105 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 3106 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 3107 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 3108 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 3109 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 3110 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 3111 if (mumps->id.ICNTL(11) > 0) { 3112 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4))); 3113 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5))); 3114 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6))); 3115 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8))); 3116 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9))); 3117 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11))); 3118 } 3119 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 3120 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 3121 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 3122 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 3123 /* ICNTL(15-17) not used */ 3124 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 3125 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 3126 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 3127 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 3128 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 3129 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 3130 3131 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 3132 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 3133 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 3134 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 3135 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 3136 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 3137 3138 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 3139 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 3140 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 3141 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 3142 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 3143 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 3144 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 3145 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 3146 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49))); 3147 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 3148 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 3149 3150 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1))); 3151 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2))); 3152 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3))); 3153 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4))); 3154 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5))); 3155 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7))); 3156 3157 /* information local to each processor */ 3158 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 3159 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 3160 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1))); 3161 PetscCall(PetscViewerFlush(viewer)); 3162 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 3163 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2))); 3164 PetscCall(PetscViewerFlush(viewer)); 3165 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 3166 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3))); 3167 PetscCall(PetscViewerFlush(viewer)); 3168 3169 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 3170 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 3171 PetscCall(PetscViewerFlush(viewer)); 3172 3173 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 3174 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 3175 PetscCall(PetscViewerFlush(viewer)); 3176 3177 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 3178 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 3179 PetscCall(PetscViewerFlush(viewer)); 3180 3181 if (mumps->ninfo && mumps->ninfo <= 80) { 3182 PetscInt i; 3183 for (i = 0; i < mumps->ninfo; i++) { 3184 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 3185 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 3186 PetscCall(PetscViewerFlush(viewer)); 3187 } 3188 } 3189 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 3190 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 3191 3192 if (mumps->myid == 0) { /* information from the host */ 3193 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1))); 3194 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2))); 3195 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3))); 3196 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34))); 3197 3198 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 3199 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 3200 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 3201 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 3202 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 3203 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 3204 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 3205 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 3206 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 3207 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 3208 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 3209 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 3210 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 3211 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 3212 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 3213 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 3214 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 3215 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 3216 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 3217 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 3218 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 3219 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 3220 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 3221 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 3222 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 3223 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 3224 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 3225 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 3226 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 3227 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 3228 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 3229 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 3230 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 3231 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 3232 } 3233 } 3234 } 3235 PetscFunctionReturn(PETSC_SUCCESS); 3236 } 3237 3238 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 3239 { 3240 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3241 3242 PetscFunctionBegin; 3243 info->block_size = 1.0; 3244 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3245 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3246 info->nz_unneeded = 0.0; 3247 info->assemblies = 0.0; 3248 info->mallocs = 0.0; 3249 info->memory = 0.0; 3250 info->fill_ratio_given = 0; 3251 info->fill_ratio_needed = 0; 3252 info->factor_mallocs = 0; 3253 PetscFunctionReturn(PETSC_SUCCESS); 3254 } 3255 3256 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 3257 { 3258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3259 const PetscScalar *arr; 3260 const PetscInt *idxs; 3261 PetscInt size, i; 3262 3263 PetscFunctionBegin; 3264 PetscCall(ISGetLocalSize(is, &size)); 3265 /* Schur complement matrix */ 3266 PetscCall(MatDestroy(&F->schur)); 3267 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 3268 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 3269 // don't allocate mumps->id.schur[] now as its precision is yet to know 3270 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 3271 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 3272 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 3273 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 3274 3275 /* MUMPS expects Fortran style indices */ 3276 PetscCall(PetscFree(mumps->id.listvar_schur)); 3277 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 3278 PetscCall(ISGetIndices(is, &idxs)); 3279 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 3280 PetscCall(ISRestoreIndices(is, &idxs)); 3281 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 3282 if (mumps->id.icntl) mumps->id.ICNTL(26) = -1; 3283 else mumps->ICNTL26 = -1; 3284 PetscFunctionReturn(PETSC_SUCCESS); 3285 } 3286 3287 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 3288 { 3289 Mat St; 3290 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3291 PetscScalar *array; 3292 PetscInt i, j, N = mumps->id.size_schur; 3293 3294 PetscFunctionBegin; 3295 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 3296 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 3297 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 3298 PetscCall(MatSetType(St, MATDENSE)); 3299 PetscCall(MatSetUp(St)); 3300 PetscCall(MatDenseGetArray(St, &array)); 3301 if (!mumps->sym) { /* MUMPS always return a full matrix */ 3302 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 3303 for (i = 0; i < N; i++) { 3304 for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3305 } 3306 } else { /* stored by columns */ 3307 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3308 } 3309 } else { /* either full or lower-triangular (not packed) */ 3310 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 3311 for (i = 0; i < N; i++) { 3312 for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3313 } 3314 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 3315 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3316 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 3317 for (i = 0; i < N; i++) { 3318 for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3319 } 3320 } 3321 } 3322 PetscCall(MatDenseRestoreArray(St, &array)); 3323 *S = St; 3324 PetscFunctionReturn(PETSC_SUCCESS); 3325 } 3326 3327 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 3328 { 3329 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3330 3331 PetscFunctionBegin; 3332 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 3333 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 3334 for (i = 0; i < nICNTL_pre; ++i) 3335 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 3336 if (i == nICNTL_pre) { /* not already cached */ 3337 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 3338 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 3339 mumps->ICNTL_pre[0]++; 3340 } 3341 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 3342 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 3343 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 3344 PetscFunctionReturn(PETSC_SUCCESS); 3345 } 3346 3347 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 3348 { 3349 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3350 3351 PetscFunctionBegin; 3352 if (mumps->id.job == JOB_NULL) { 3353 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 3354 *ival = 0; 3355 for (i = 0; i < nICNTL_pre; ++i) { 3356 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 3357 } 3358 } else *ival = mumps->id.ICNTL(icntl); 3359 PetscFunctionReturn(PETSC_SUCCESS); 3360 } 3361 3362 /*@ 3363 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3364 3365 Logically Collective 3366 3367 Input Parameters: 3368 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3369 . icntl - index of MUMPS parameter array `ICNTL()` 3370 - ival - value of MUMPS `ICNTL(icntl)` 3371 3372 Options Database Key: 3373 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 3374 3375 Level: beginner 3376 3377 Note: 3378 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3379 3380 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3381 @*/ 3382 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 3383 { 3384 PetscFunctionBegin; 3385 PetscValidType(F, 1); 3386 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3387 PetscValidLogicalCollectiveInt(F, icntl, 2); 3388 PetscValidLogicalCollectiveInt(F, ival, 3); 3389 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3390 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 3391 PetscFunctionReturn(PETSC_SUCCESS); 3392 } 3393 3394 /*@ 3395 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3396 3397 Logically Collective 3398 3399 Input Parameters: 3400 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3401 - icntl - index of MUMPS parameter array ICNTL() 3402 3403 Output Parameter: 3404 . ival - value of MUMPS ICNTL(icntl) 3405 3406 Level: beginner 3407 3408 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3409 @*/ 3410 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 3411 { 3412 PetscFunctionBegin; 3413 PetscValidType(F, 1); 3414 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3415 PetscValidLogicalCollectiveInt(F, icntl, 2); 3416 PetscAssertPointer(ival, 3); 3417 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3418 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3419 PetscFunctionReturn(PETSC_SUCCESS); 3420 } 3421 3422 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 3423 { 3424 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3425 3426 PetscFunctionBegin; 3427 if (mumps->id.job == JOB_NULL) { 3428 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3429 for (i = 0; i < nCNTL_pre; ++i) 3430 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 3431 if (i == nCNTL_pre) { 3432 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 3433 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 3434 mumps->CNTL_pre[0]++; 3435 } 3436 mumps->CNTL_pre[1 + 2 * i] = icntl; 3437 mumps->CNTL_pre[2 + 2 * i] = val; 3438 } else ID_CNTL_SET(mumps->id, icntl, val); 3439 PetscFunctionReturn(PETSC_SUCCESS); 3440 } 3441 3442 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 3443 { 3444 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3445 3446 PetscFunctionBegin; 3447 if (mumps->id.job == JOB_NULL) { 3448 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3449 *val = 0.0; 3450 for (i = 0; i < nCNTL_pre; ++i) { 3451 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 3452 } 3453 } else *val = ID_CNTL_GET(mumps->id, icntl); 3454 PetscFunctionReturn(PETSC_SUCCESS); 3455 } 3456 3457 /*@C 3458 MatMumpsSetOocTmpDir - Set MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc> 3459 3460 Logically Collective 3461 3462 Input Parameters: 3463 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`. 3464 - tmpdir - temporary directory for out-of-core facility. 3465 3466 Level: beginner 3467 3468 Note: 3469 To make it effective, this routine must be called before the numeric factorization, i.e., `PCSetUp()`. 3470 If `ooc_tmpdir` is not set, MUMPS will also check the environment variable `MUMPS_OOC_TMPDIR`. But if neither was defined, it will use /tmp by default. 3471 3472 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3473 @*/ 3474 PetscErrorCode MatMumpsSetOocTmpDir(Mat F, const char *tmpdir) 3475 { 3476 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3477 3478 PetscFunctionBegin; 3479 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 3480 PetscValidType(F, 1); 3481 PetscCall(PetscStrncpy(mumps->id.ooc_tmpdir, tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir))); 3482 PetscFunctionReturn(PETSC_SUCCESS); 3483 } 3484 3485 /*@C 3486 MatMumpsGetOocTmpDir - Get MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc> 3487 3488 Logically Collective 3489 3490 Input Parameter: 3491 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`. 3492 3493 Output Parameter: 3494 . tmpdir - temporary directory for out-of-core facility. 3495 3496 Level: beginner 3497 3498 Note: 3499 The returned string is read-only and user should not try to change it. 3500 3501 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3502 @*/ 3503 PetscErrorCode MatMumpsGetOocTmpDir(Mat F, const char **tmpdir) 3504 { 3505 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3506 3507 PetscFunctionBegin; 3508 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 3509 PetscValidType(F, 1); 3510 if (tmpdir) *tmpdir = mumps->id.ooc_tmpdir; 3511 PetscFunctionReturn(PETSC_SUCCESS); 3512 } 3513 3514 /*@ 3515 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3516 3517 Logically Collective 3518 3519 Input Parameters: 3520 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3521 . icntl - index of MUMPS parameter array `CNTL()` 3522 - val - value of MUMPS `CNTL(icntl)` 3523 3524 Options Database Key: 3525 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 3526 3527 Level: beginner 3528 3529 Note: 3530 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3531 3532 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3533 @*/ 3534 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3535 { 3536 PetscFunctionBegin; 3537 PetscValidType(F, 1); 3538 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3539 PetscValidLogicalCollectiveInt(F, icntl, 2); 3540 PetscValidLogicalCollectiveReal(F, val, 3); 3541 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3542 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 3543 PetscFunctionReturn(PETSC_SUCCESS); 3544 } 3545 3546 /*@ 3547 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3548 3549 Logically Collective 3550 3551 Input Parameters: 3552 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3553 - icntl - index of MUMPS parameter array CNTL() 3554 3555 Output Parameter: 3556 . val - value of MUMPS CNTL(icntl) 3557 3558 Level: beginner 3559 3560 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3561 @*/ 3562 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3563 { 3564 PetscFunctionBegin; 3565 PetscValidType(F, 1); 3566 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3567 PetscValidLogicalCollectiveInt(F, icntl, 2); 3568 PetscAssertPointer(val, 3); 3569 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3570 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3571 PetscFunctionReturn(PETSC_SUCCESS); 3572 } 3573 3574 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3575 { 3576 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3577 3578 PetscFunctionBegin; 3579 *info = mumps->id.INFO(icntl); 3580 PetscFunctionReturn(PETSC_SUCCESS); 3581 } 3582 3583 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3584 { 3585 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3586 3587 PetscFunctionBegin; 3588 *infog = mumps->id.INFOG(icntl); 3589 PetscFunctionReturn(PETSC_SUCCESS); 3590 } 3591 3592 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3593 { 3594 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3595 3596 PetscFunctionBegin; 3597 *rinfo = ID_RINFO_GET(mumps->id, icntl); 3598 PetscFunctionReturn(PETSC_SUCCESS); 3599 } 3600 3601 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3602 { 3603 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3604 3605 PetscFunctionBegin; 3606 *rinfog = ID_RINFOG_GET(mumps->id, icntl); 3607 PetscFunctionReturn(PETSC_SUCCESS); 3608 } 3609 3610 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 3611 { 3612 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3613 3614 PetscFunctionBegin; 3615 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 3616 *size = 0; 3617 *array = NULL; 3618 if (!mumps->myid) { 3619 *size = mumps->id.INFOG(28); 3620 PetscCall(PetscMalloc1(*size, array)); 3621 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 3622 } 3623 PetscFunctionReturn(PETSC_SUCCESS); 3624 } 3625 3626 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3627 { 3628 Mat Bt = NULL, Btseq = NULL; 3629 PetscBool flg; 3630 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3631 PetscScalar *aa; 3632 PetscInt spnr, *ia, *ja, M, nrhs; 3633 3634 PetscFunctionBegin; 3635 PetscAssertPointer(spRHS, 2); 3636 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3637 PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3638 PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 3639 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3640 3641 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3642 3643 if (mumps->petsc_size > 1) { 3644 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3645 Btseq = b->A; 3646 } else { 3647 Btseq = Bt; 3648 } 3649 3650 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3651 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3652 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3653 mumps->id.rhs = NULL; 3654 3655 if (!mumps->myid) { 3656 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3657 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3658 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3659 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3660 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 3661 } else { 3662 mumps->id.irhs_ptr = NULL; 3663 mumps->id.irhs_sparse = NULL; 3664 mumps->id.nz_rhs = 0; 3665 if (mumps->id.rhs_sparse_len) { 3666 PetscCall(PetscFree(mumps->id.rhs_sparse)); 3667 mumps->id.rhs_sparse_len = 0; 3668 } 3669 } 3670 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3671 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3672 3673 /* solve phase */ 3674 mumps->id.job = JOB_SOLVE; 3675 PetscMUMPS_c(mumps); 3676 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3677 3678 if (!mumps->myid) { 3679 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3680 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3681 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3682 } 3683 PetscFunctionReturn(PETSC_SUCCESS); 3684 } 3685 3686 /*@ 3687 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3688 3689 Logically Collective 3690 3691 Input Parameter: 3692 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3693 3694 Output Parameter: 3695 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3696 3697 Level: beginner 3698 3699 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3700 @*/ 3701 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3702 { 3703 PetscFunctionBegin; 3704 PetscValidType(F, 1); 3705 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3706 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3707 PetscFunctionReturn(PETSC_SUCCESS); 3708 } 3709 3710 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3711 { 3712 Mat spRHS; 3713 3714 PetscFunctionBegin; 3715 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3716 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3717 PetscCall(MatDestroy(&spRHS)); 3718 PetscFunctionReturn(PETSC_SUCCESS); 3719 } 3720 3721 /*@ 3722 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3723 3724 Logically Collective 3725 3726 Input Parameter: 3727 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3728 3729 Output Parameter: 3730 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3731 3732 Level: beginner 3733 3734 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3735 @*/ 3736 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3737 { 3738 PetscBool flg; 3739 3740 PetscFunctionBegin; 3741 PetscValidType(F, 1); 3742 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3743 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3744 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3745 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3746 PetscFunctionReturn(PETSC_SUCCESS); 3747 } 3748 3749 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3750 { 3751 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3752 3753 PetscFunctionBegin; 3754 if (nblk) { 3755 PetscAssertPointer(blkptr, 4); 3756 PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 3757 PetscCall(PetscFree(mumps->id.blkptr)); 3758 PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 3759 for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3760 // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15). 3761 // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS(). 3762 // mumps->id.ICNTL(15) = 1; 3763 if (blkvar) { 3764 PetscCall(PetscFree(mumps->id.blkvar)); 3765 PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 3766 for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 3767 } 3768 } else { 3769 PetscCall(PetscFree(mumps->id.blkptr)); 3770 PetscCall(PetscFree(mumps->id.blkvar)); 3771 // mumps->id.ICNTL(15) = 0; 3772 mumps->id.nblk = 0; 3773 } 3774 PetscFunctionReturn(PETSC_SUCCESS); 3775 } 3776 3777 /*@ 3778 MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 3779 3780 Not collective, only relevant on the first process of the MPI communicator 3781 3782 Input Parameters: 3783 + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3784 . nblk - the number of blocks 3785 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 3786 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 3787 3788 Level: advanced 3789 3790 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 3791 @*/ 3792 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3793 { 3794 PetscFunctionBegin; 3795 PetscValidType(F, 1); 3796 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3797 PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 3798 PetscFunctionReturn(PETSC_SUCCESS); 3799 } 3800 3801 /*@ 3802 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3803 3804 Logically Collective 3805 3806 Input Parameters: 3807 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3808 - icntl - index of MUMPS parameter array INFO() 3809 3810 Output Parameter: 3811 . ival - value of MUMPS INFO(icntl) 3812 3813 Level: beginner 3814 3815 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3816 @*/ 3817 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3818 { 3819 PetscFunctionBegin; 3820 PetscValidType(F, 1); 3821 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3822 PetscAssertPointer(ival, 3); 3823 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3824 PetscFunctionReturn(PETSC_SUCCESS); 3825 } 3826 3827 /*@ 3828 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3829 3830 Logically Collective 3831 3832 Input Parameters: 3833 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3834 - icntl - index of MUMPS parameter array INFOG() 3835 3836 Output Parameter: 3837 . ival - value of MUMPS INFOG(icntl) 3838 3839 Level: beginner 3840 3841 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3842 @*/ 3843 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3844 { 3845 PetscFunctionBegin; 3846 PetscValidType(F, 1); 3847 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3848 PetscAssertPointer(ival, 3); 3849 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3850 PetscFunctionReturn(PETSC_SUCCESS); 3851 } 3852 3853 /*@ 3854 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3855 3856 Logically Collective 3857 3858 Input Parameters: 3859 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3860 - icntl - index of MUMPS parameter array RINFO() 3861 3862 Output Parameter: 3863 . val - value of MUMPS RINFO(icntl) 3864 3865 Level: beginner 3866 3867 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3868 @*/ 3869 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3870 { 3871 PetscFunctionBegin; 3872 PetscValidType(F, 1); 3873 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3874 PetscAssertPointer(val, 3); 3875 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3876 PetscFunctionReturn(PETSC_SUCCESS); 3877 } 3878 3879 /*@ 3880 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3881 3882 Logically Collective 3883 3884 Input Parameters: 3885 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3886 - icntl - index of MUMPS parameter array RINFOG() 3887 3888 Output Parameter: 3889 . val - value of MUMPS RINFOG(icntl) 3890 3891 Level: beginner 3892 3893 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3894 @*/ 3895 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3896 { 3897 PetscFunctionBegin; 3898 PetscValidType(F, 1); 3899 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3900 PetscAssertPointer(val, 3); 3901 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3902 PetscFunctionReturn(PETSC_SUCCESS); 3903 } 3904 3905 /*@ 3906 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3907 3908 Logically Collective 3909 3910 Input Parameter: 3911 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3912 3913 Output Parameters: 3914 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3915 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3916 for freeing this array. 3917 3918 Level: beginner 3919 3920 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3921 @*/ 3922 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3923 { 3924 PetscFunctionBegin; 3925 PetscValidType(F, 1); 3926 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3927 PetscAssertPointer(size, 2); 3928 PetscAssertPointer(array, 3); 3929 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3930 PetscFunctionReturn(PETSC_SUCCESS); 3931 } 3932 3933 /*MC 3934 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3935 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3936 3937 Works with `MATAIJ` and `MATSBAIJ` matrices 3938 3939 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3940 3941 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3942 See details below. 3943 3944 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3945 3946 Options Database Keys: 3947 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3948 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3949 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3950 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3951 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3952 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3953 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3954 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3955 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3956 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3957 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3958 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3959 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3960 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3961 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3962 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3963 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3964 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3965 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3966 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3967 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3968 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3969 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3970 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3971 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3972 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3973 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3974 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3975 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3976 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3977 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3978 . -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase 3979 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3980 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3981 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3982 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3983 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3984 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3985 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3986 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3987 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3988 3989 Level: beginner 3990 3991 Notes: 3992 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3993 error if the matrix is Hermitian. 3994 3995 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3996 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3997 3998 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3999 the failure with 4000 .vb 4001 KSPGetPC(ksp,&pc); 4002 PCFactorGetMatrix(pc,&mat); 4003 MatMumpsGetInfo(mat,....); 4004 MatMumpsGetInfog(mat,....); etc. 4005 .ve 4006 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 4007 4008 MUMPS provides 64-bit integer support in two build modes: 4009 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 4010 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 4011 4012 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 4013 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 4014 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 4015 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 4016 4017 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 4018 4019 Two modes to run MUMPS/PETSc with OpenMP 4020 .vb 4021 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 4022 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 4023 .ve 4024 4025 .vb 4026 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 4027 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 4028 .ve 4029 4030 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 4031 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 4032 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 4033 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 4034 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 4035 4036 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 4037 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 4038 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 4039 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 4040 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 4041 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 4042 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 4043 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 4044 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 4045 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 4046 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 4047 examine the mapping result. 4048 4049 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 4050 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 4051 calls `omp_set_num_threads`(m) internally before calling MUMPS. 4052 4053 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 4054 4055 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 4056 M*/ 4057 4058 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 4059 { 4060 PetscFunctionBegin; 4061 *type = MATSOLVERMUMPS; 4062 PetscFunctionReturn(PETSC_SUCCESS); 4063 } 4064 4065 /* MatGetFactor for Seq and MPI AIJ matrices */ 4066 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 4067 { 4068 Mat B; 4069 Mat_MUMPS *mumps; 4070 PetscBool isSeqAIJ, isDiag, isDense; 4071 PetscMPIInt size; 4072 4073 PetscFunctionBegin; 4074 #if defined(PETSC_USE_COMPLEX) 4075 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4076 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4077 *F = NULL; 4078 PetscFunctionReturn(PETSC_SUCCESS); 4079 } 4080 #endif 4081 /* Create the factorization matrix */ 4082 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 4083 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 4084 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4085 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4086 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4087 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4088 PetscCall(MatSetUp(B)); 4089 4090 PetscCall(PetscNew(&mumps)); 4091 4092 B->ops->view = MatView_MUMPS; 4093 B->ops->getinfo = MatGetInfo_MUMPS; 4094 4095 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4096 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4097 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4098 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4099 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4100 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4101 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4102 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4103 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4104 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4105 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4106 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4107 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4108 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4109 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4110 4111 if (ftype == MAT_FACTOR_LU) { 4112 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4113 B->factortype = MAT_FACTOR_LU; 4114 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 4115 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4116 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4117 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 4118 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4119 mumps->sym = 0; 4120 } else { 4121 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4122 B->factortype = MAT_FACTOR_CHOLESKY; 4123 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 4124 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4125 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4126 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 4127 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4128 #if defined(PETSC_USE_COMPLEX) 4129 mumps->sym = 2; 4130 #else 4131 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4132 else mumps->sym = 2; 4133 #endif 4134 } 4135 4136 /* set solvertype */ 4137 PetscCall(PetscFree(B->solvertype)); 4138 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4139 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4140 if (size == 1) { 4141 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4142 B->canuseordering = PETSC_TRUE; 4143 } 4144 B->ops->destroy = MatDestroy_MUMPS; 4145 B->data = (void *)mumps; 4146 4147 *F = B; 4148 mumps->id.job = JOB_NULL; 4149 mumps->ICNTL_pre = NULL; 4150 mumps->CNTL_pre = NULL; 4151 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4152 PetscFunctionReturn(PETSC_SUCCESS); 4153 } 4154 4155 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 4156 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 4157 { 4158 Mat B; 4159 Mat_MUMPS *mumps; 4160 PetscBool isSeqSBAIJ; 4161 PetscMPIInt size; 4162 4163 PetscFunctionBegin; 4164 #if defined(PETSC_USE_COMPLEX) 4165 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4166 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4167 *F = NULL; 4168 PetscFunctionReturn(PETSC_SUCCESS); 4169 } 4170 #endif 4171 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4172 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4173 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4174 PetscCall(MatSetUp(B)); 4175 4176 PetscCall(PetscNew(&mumps)); 4177 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 4178 if (isSeqSBAIJ) { 4179 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 4180 } else { 4181 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 4182 } 4183 4184 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4185 B->ops->view = MatView_MUMPS; 4186 B->ops->getinfo = MatGetInfo_MUMPS; 4187 4188 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4190 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4203 4204 B->factortype = MAT_FACTOR_CHOLESKY; 4205 #if defined(PETSC_USE_COMPLEX) 4206 mumps->sym = 2; 4207 #else 4208 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4209 else mumps->sym = 2; 4210 #endif 4211 4212 /* set solvertype */ 4213 PetscCall(PetscFree(B->solvertype)); 4214 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4215 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4216 if (size == 1) { 4217 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4218 B->canuseordering = PETSC_TRUE; 4219 } 4220 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4221 B->ops->destroy = MatDestroy_MUMPS; 4222 B->data = (void *)mumps; 4223 4224 *F = B; 4225 mumps->id.job = JOB_NULL; 4226 mumps->ICNTL_pre = NULL; 4227 mumps->CNTL_pre = NULL; 4228 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4229 PetscFunctionReturn(PETSC_SUCCESS); 4230 } 4231 4232 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 4233 { 4234 Mat B; 4235 Mat_MUMPS *mumps; 4236 PetscBool isSeqBAIJ; 4237 PetscMPIInt size; 4238 4239 PetscFunctionBegin; 4240 /* Create the factorization matrix */ 4241 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 4242 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4243 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4244 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4245 PetscCall(MatSetUp(B)); 4246 4247 PetscCall(PetscNew(&mumps)); 4248 PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 4249 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 4250 B->factortype = MAT_FACTOR_LU; 4251 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 4252 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 4253 mumps->sym = 0; 4254 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4255 4256 B->ops->view = MatView_MUMPS; 4257 B->ops->getinfo = MatGetInfo_MUMPS; 4258 4259 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4260 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4261 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4262 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4263 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4264 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4265 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4266 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4267 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4268 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4269 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4270 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4271 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4272 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4273 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4274 4275 /* set solvertype */ 4276 PetscCall(PetscFree(B->solvertype)); 4277 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4278 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4279 if (size == 1) { 4280 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4281 B->canuseordering = PETSC_TRUE; 4282 } 4283 B->ops->destroy = MatDestroy_MUMPS; 4284 B->data = (void *)mumps; 4285 4286 *F = B; 4287 mumps->id.job = JOB_NULL; 4288 mumps->ICNTL_pre = NULL; 4289 mumps->CNTL_pre = NULL; 4290 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4291 PetscFunctionReturn(PETSC_SUCCESS); 4292 } 4293 4294 /* MatGetFactor for Seq and MPI SELL matrices */ 4295 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 4296 { 4297 Mat B; 4298 Mat_MUMPS *mumps; 4299 PetscBool isSeqSELL; 4300 PetscMPIInt size; 4301 4302 PetscFunctionBegin; 4303 /* Create the factorization matrix */ 4304 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 4305 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4306 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4307 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4308 PetscCall(MatSetUp(B)); 4309 4310 PetscCall(PetscNew(&mumps)); 4311 4312 B->ops->view = MatView_MUMPS; 4313 B->ops->getinfo = MatGetInfo_MUMPS; 4314 4315 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4316 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4317 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4318 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4319 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4320 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4321 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4322 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4323 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4324 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4325 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4326 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4327 4328 PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4329 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4330 B->factortype = MAT_FACTOR_LU; 4331 PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4332 mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 4333 mumps->sym = 0; 4334 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4335 4336 /* set solvertype */ 4337 PetscCall(PetscFree(B->solvertype)); 4338 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4339 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4340 if (size == 1) { 4341 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4342 B->canuseordering = PETSC_TRUE; 4343 } 4344 B->ops->destroy = MatDestroy_MUMPS; 4345 B->data = (void *)mumps; 4346 4347 *F = B; 4348 mumps->id.job = JOB_NULL; 4349 mumps->ICNTL_pre = NULL; 4350 mumps->CNTL_pre = NULL; 4351 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4352 PetscFunctionReturn(PETSC_SUCCESS); 4353 } 4354 4355 /* MatGetFactor for MATNEST matrices */ 4356 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 4357 { 4358 Mat B, **mats; 4359 Mat_MUMPS *mumps; 4360 PetscInt nr, nc; 4361 PetscMPIInt size; 4362 PetscBool flg = PETSC_TRUE; 4363 4364 PetscFunctionBegin; 4365 #if defined(PETSC_USE_COMPLEX) 4366 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4367 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4368 *F = NULL; 4369 PetscFunctionReturn(PETSC_SUCCESS); 4370 } 4371 #endif 4372 4373 /* Return if some condition is not satisfied */ 4374 *F = NULL; 4375 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 4376 if (ftype == MAT_FACTOR_CHOLESKY) { 4377 IS *rows, *cols; 4378 PetscInt *m, *M; 4379 4380 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 4381 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 4382 PetscCall(MatNestGetISs(A, rows, cols)); 4383 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 4384 if (!flg) { 4385 PetscCall(PetscFree2(rows, cols)); 4386 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 4387 PetscFunctionReturn(PETSC_SUCCESS); 4388 } 4389 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 4390 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 4391 for (PetscInt r = 0; flg && r < nr; r++) 4392 for (PetscInt k = r + 1; flg && k < nr; k++) 4393 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 4394 PetscCall(PetscFree2(m, M)); 4395 PetscCall(PetscFree2(rows, cols)); 4396 if (!flg) { 4397 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 4398 PetscFunctionReturn(PETSC_SUCCESS); 4399 } 4400 } 4401 4402 for (PetscInt r = 0; r < nr; r++) { 4403 for (PetscInt c = 0; c < nc; c++) { 4404 Mat sub = mats[r][c]; 4405 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 4406 4407 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 4408 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 4409 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 4410 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 4411 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 4412 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 4413 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 4414 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 4415 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 4416 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4417 if (ftype == MAT_FACTOR_CHOLESKY) { 4418 if (r == c) { 4419 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 4420 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4421 flg = PETSC_FALSE; 4422 } 4423 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4424 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4425 flg = PETSC_FALSE; 4426 } 4427 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4428 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 4429 flg = PETSC_FALSE; 4430 } 4431 } 4432 } 4433 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 4434 4435 /* Create the factorization matrix */ 4436 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4437 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4438 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4439 PetscCall(MatSetUp(B)); 4440 4441 PetscCall(PetscNew(&mumps)); 4442 4443 B->ops->view = MatView_MUMPS; 4444 B->ops->getinfo = MatGetInfo_MUMPS; 4445 4446 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4447 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4448 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4449 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4450 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4451 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4452 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4453 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4454 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4455 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4456 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4457 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4458 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4459 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4460 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4461 4462 if (ftype == MAT_FACTOR_LU) { 4463 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4464 B->factortype = MAT_FACTOR_LU; 4465 mumps->sym = 0; 4466 } else { 4467 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4468 B->factortype = MAT_FACTOR_CHOLESKY; 4469 #if defined(PETSC_USE_COMPLEX) 4470 mumps->sym = 2; 4471 #else 4472 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4473 else mumps->sym = 2; 4474 #endif 4475 } 4476 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 4477 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 4478 4479 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4480 if (size == 1) { 4481 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4482 B->canuseordering = PETSC_TRUE; 4483 } 4484 4485 /* set solvertype */ 4486 PetscCall(PetscFree(B->solvertype)); 4487 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4488 B->ops->destroy = MatDestroy_MUMPS; 4489 B->data = (void *)mumps; 4490 4491 *F = B; 4492 mumps->id.job = JOB_NULL; 4493 mumps->ICNTL_pre = NULL; 4494 mumps->CNTL_pre = NULL; 4495 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4496 PetscFunctionReturn(PETSC_SUCCESS); 4497 } 4498 4499 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 4500 { 4501 PetscFunctionBegin; 4502 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4503 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4504 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4505 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4506 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4507 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4508 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4509 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4510 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4511 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4512 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 4513 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4514 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4515 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4516 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4517 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4518 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4519 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 4520 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 4521 PetscFunctionReturn(PETSC_SUCCESS); 4522 } 4523