1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 #include <petsc/private/vecimpl.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 15 #include <cmumps_c.h> 16 #include <zmumps_c.h> 17 #include <smumps_c.h> 18 #include <dmumps_c.h> 19 #else 20 #if defined(PETSC_USE_COMPLEX) 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <cmumps_c.h> 23 #define MUMPS_c cmumps_c 24 #define MumpsScalar CMUMPS_COMPLEX 25 #else 26 #include <zmumps_c.h> 27 #define MUMPS_c zmumps_c 28 #define MumpsScalar ZMUMPS_COMPLEX 29 #endif 30 #else 31 #if defined(PETSC_USE_REAL_SINGLE) 32 #include <smumps_c.h> 33 #define MUMPS_c smumps_c 34 #define MumpsScalar SMUMPS_REAL 35 #else 36 #include <dmumps_c.h> 37 #define MUMPS_c dmumps_c 38 #define MumpsScalar DMUMPS_REAL 39 #endif 40 #endif 41 #endif 42 #if defined(PETSC_USE_COMPLEX) 43 #if defined(PETSC_USE_REAL_SINGLE) 44 #define MUMPS_STRUC_C CMUMPS_STRUC_C 45 #else 46 #define MUMPS_STRUC_C ZMUMPS_STRUC_C 47 #endif 48 #else 49 #if defined(PETSC_USE_REAL_SINGLE) 50 #define MUMPS_STRUC_C SMUMPS_STRUC_C 51 #else 52 #define MUMPS_STRUC_C DMUMPS_STRUC_C 53 #endif 54 #endif 55 EXTERN_C_END 56 57 #define JOB_INIT -1 58 #define JOB_NULL 0 59 #define JOB_FACTSYMBOLIC 1 60 #define JOB_FACTNUMERIC 2 61 #define JOB_SOLVE 3 62 #define JOB_END -2 63 64 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 65 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 66 naming convention in PetscMPIInt, PetscBLASInt etc. 67 */ 68 typedef MUMPS_INT PetscMUMPSInt; 69 70 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 71 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 72 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 73 #endif 74 #else 75 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 76 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 77 #endif 78 #endif 79 80 #define MPIU_MUMPSINT MPI_INT 81 #define PETSC_MUMPS_INT_MAX 2147483647 82 #define PETSC_MUMPS_INT_MIN -2147483648 83 84 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 85 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 86 { 87 PetscFunctionBegin; 88 #if PetscDefined(USE_64BIT_INDICES) 89 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 90 #endif 91 *b = (PetscMUMPSInt)a; 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 /* Put these utility routines here since they are only used in this file */ 96 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 97 { 98 PetscInt myval; 99 PetscBool myset; 100 101 PetscFunctionBegin; 102 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 103 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 104 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 105 if (set) *set = myset; 106 PetscFunctionReturn(PETSC_SUCCESS); 107 } 108 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 109 110 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C. 111 // 112 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type. 113 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size 114 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall(). 115 // 116 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface. 117 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used. 118 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers. 119 typedef struct { 120 PetscPrecision precision; // precision used by MUMPS 121 void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex 122 123 // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions. 124 MUMPS_INT sym, par, job; 125 MUMPS_INT comm_fortran; /* Fortran communicator */ 126 MUMPS_INT *icntl; 127 void *cntl; // MumpsReal, fixed size array 128 MUMPS_INT n; 129 MUMPS_INT nblk; 130 131 /* Assembled entry */ 132 MUMPS_INT8 nnz; 133 MUMPS_INT *irn; 134 MUMPS_INT *jcn; 135 void *a; // MumpsScalar, centralized input 136 PetscCount a_len; 137 138 /* Distributed entry */ 139 MUMPS_INT8 nnz_loc; 140 MUMPS_INT *irn_loc; 141 MUMPS_INT *jcn_loc; 142 void *a_loc; // MumpsScalar, distributed input 143 PetscCount a_loc_len; 144 145 /* Matrix by blocks */ 146 MUMPS_INT *blkptr; 147 MUMPS_INT *blkvar; 148 149 /* Ordering, if given by user */ 150 MUMPS_INT *perm_in; 151 152 /* RHS, solution, ouptput data and statistics */ 153 void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers 154 PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar 155 156 MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc; 157 MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc; 158 // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0) 159 MUMPS_INT schur_lld; 160 MUMPS_INT *info, *infog; // fixed size array 161 void *rinfo, *rinfog; // MumpsReal, fixed size array 162 163 /* Null space */ 164 MUMPS_INT *pivnul_list; // allocated by MUMPS! 165 MUMPS_INT *mapping; // allocated by MUMPS! 166 167 /* Schur */ 168 MUMPS_INT size_schur; 169 MUMPS_INT *listvar_schur; 170 void *schur; // MumpsScalar 171 PetscCount schur_len; 172 173 /* For out-of-core */ 174 char *ooc_tmpdir; // fixed size array 175 char *ooc_prefix; // fixed size array 176 } XMUMPS_STRUC_C; 177 178 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct 179 #define AllocateInternalID(MUMPS_STRUC_T, outer) \ 180 do { \ 181 MUMPS_STRUC_T *inner; \ 182 PetscCall(PetscNew(&inner)); \ 183 outer->icntl = inner->icntl; \ 184 outer->cntl = inner->cntl; \ 185 outer->info = inner->info; \ 186 outer->infog = inner->infog; \ 187 outer->rinfo = inner->rinfo; \ 188 outer->rinfog = inner->rinfog; \ 189 outer->ooc_tmpdir = inner->ooc_tmpdir; \ 190 outer->ooc_prefix = inner->ooc_prefix; \ 191 /* the three field should never change after init */ \ 192 inner->comm_fortran = outer->comm_fortran; \ 193 inner->par = outer->par; \ 194 inner->sym = outer->sym; \ 195 outer->internal_id = inner; \ 196 } while (0) 197 198 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner 199 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision) 200 { 201 PetscFunctionBegin; 202 outer->precision = precision; 203 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 204 #if defined(PETSC_USE_COMPLEX) 205 if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(CMUMPS_STRUC_C, outer); 206 else AllocateInternalID(ZMUMPS_STRUC_C, outer); 207 #else 208 if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(SMUMPS_STRUC_C, outer); 209 else AllocateInternalID(DMUMPS_STRUC_C, outer); 210 #endif 211 #else 212 AllocateInternalID(MUMPS_STRUC_C, outer); 213 #endif 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \ 218 do { \ 219 MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \ 220 PetscCall(PetscFree(inner->a)); \ 221 PetscCall(PetscFree(inner->a_loc)); \ 222 PetscCall(PetscFree(inner->redrhs)); \ 223 PetscCall(PetscFree(inner->rhs)); \ 224 PetscCall(PetscFree(inner->rhs_sparse)); \ 225 PetscCall(PetscFree(inner->rhs_loc)); \ 226 PetscCall(PetscFree(inner->sol_loc)); \ 227 PetscCall(PetscFree(inner->schur)); \ 228 } while (0) 229 230 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer) 231 { 232 PetscFunctionBegin; 233 if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer 234 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 235 const PetscPrecision mumps_precision = outer->precision; 236 if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision 237 #if defined(PETSC_USE_COMPLEX) 238 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer); 239 else FreeInternalIDFields(ZMUMPS_STRUC_C, outer); 240 #else 241 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer); 242 else FreeInternalIDFields(DMUMPS_STRUC_C, outer); 243 #endif 244 } 245 #endif 246 PetscCall(PetscFree(outer->internal_id)); 247 } 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>. 252 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array. 253 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>) 254 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request. 255 // 256 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same. 257 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output. 258 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output. 259 // 260 // 261 // Input parameters: 262 // + convert - whether to do PetscScalar to MumpsScalar conversion 263 // . n - length of the PetscScalar array 264 // . pa - [n]], points to the PetscScalar array 265 // . precision - precision of MumpsScalar 266 // . m - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero. 267 // - ma - on input, an existing MumpsScalar array if any. 268 // 269 // Output parameters: 270 // + m - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0 271 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same. 272 // 273 // Note: 274 // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller. 275 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma) 276 { 277 PetscFunctionBegin; 278 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 279 const PetscPrecision mumps_precision = precision; 280 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision); 281 #if defined(PETSC_USE_COMPLEX) 282 if (mumps_precision != PETSC_SCALAR_PRECISION) { 283 if (mumps_precision == PETSC_PRECISION_SINGLE) { 284 if (*m < n) { 285 PetscCall(PetscFree(*ma)); 286 PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma)); 287 *m = n; 288 } 289 if (convert) { 290 CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma; 291 for (PetscCount i = 0; i < n; i++) { 292 b[i].r = PetscRealPart(pa[i]); 293 b[i].i = PetscImaginaryPart(pa[i]); 294 } 295 } 296 } else { 297 if (*m < n) { 298 PetscCall(PetscFree(*ma)); 299 PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma)); 300 *m = n; 301 } 302 if (convert) { 303 ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma; 304 for (PetscCount i = 0; i < n; i++) { 305 b[i].r = PetscRealPart(pa[i]); 306 b[i].i = PetscImaginaryPart(pa[i]); 307 } 308 } 309 } 310 } 311 #else 312 if (mumps_precision != PETSC_SCALAR_PRECISION) { 313 if (mumps_precision == PETSC_PRECISION_SINGLE) { 314 if (*m < n) { 315 PetscCall(PetscFree(*ma)); 316 PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma)); 317 *m = n; 318 } 319 if (convert) { 320 SMUMPS_REAL *b = *(SMUMPS_REAL **)ma; 321 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 322 } 323 } else { 324 if (*m < n) { 325 PetscCall(PetscFree(*ma)); 326 PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma)); 327 *m = n; 328 } 329 if (convert) { 330 DMUMPS_REAL *b = *(DMUMPS_REAL **)ma; 331 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 332 } 333 } 334 } 335 #endif 336 else 337 #endif 338 { 339 if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any 340 *ma = (void *)pa; // same precision, make them alias 341 *m = 0; 342 } 343 PetscFunctionReturn(PETSC_SUCCESS); 344 } 345 346 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>. 347 // 348 // 1) If the two types are different, cast array elements. 349 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 350 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa) 351 { 352 PetscFunctionBegin; 353 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 354 if (mumps_precision != PETSC_SCALAR_PRECISION) { 355 #if defined(PETSC_USE_COMPLEX) 356 if (mumps_precision == PETSC_PRECISION_SINGLE) { 357 PetscReal *a = (PetscReal *)pa; 358 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 359 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 360 } else { 361 PetscReal *a = (PetscReal *)pa; 362 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 363 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 364 } 365 #else 366 if (mumps_precision == PETSC_PRECISION_SINGLE) { 367 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 368 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 369 } else { 370 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 371 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 372 } 373 #endif 374 } else 375 #endif 376 PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>. 381 // 382 // 1) If the two types are different, cast array elements. 383 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 384 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma) 385 { 386 PetscFunctionBegin; 387 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 388 if (mumps_precision != PETSC_SCALAR_PRECISION) { 389 #if defined(PETSC_USE_COMPLEX) 390 if (mumps_precision == PETSC_PRECISION_SINGLE) { 391 CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma; 392 for (PetscCount i = 0; i < n; i++) { 393 b[i].r = PetscRealPart(pa[i]); 394 b[i].i = PetscImaginaryPart(pa[i]); 395 } 396 } else { 397 ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma; 398 for (PetscCount i = 0; i < n; i++) { 399 b[i].r = PetscRealPart(pa[i]); 400 b[i].i = PetscImaginaryPart(pa[i]); 401 } 402 } 403 #else 404 if (mumps_precision == PETSC_PRECISION_SINGLE) { 405 SMUMPS_REAL *b = (SMUMPS_REAL *)ma; 406 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 407 } else { 408 DMUMPS_REAL *b = (DMUMPS_REAL *)ma; 409 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 410 } 411 #endif 412 } else 413 #endif 414 PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n)); 415 PetscFunctionReturn(PETSC_SUCCESS); 416 } 417 418 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id) 419 { 420 return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT; 421 } 422 423 #define PreMumpsCall(inner, outer, mumpsscalar) \ 424 do { \ 425 inner->job = outer->job; \ 426 inner->n = outer->n; \ 427 inner->nblk = outer->nblk; \ 428 inner->nnz = outer->nnz; \ 429 inner->irn = outer->irn; \ 430 inner->jcn = outer->jcn; \ 431 inner->a = (mumpsscalar *)outer->a; \ 432 inner->nnz_loc = outer->nnz_loc; \ 433 inner->irn_loc = outer->irn_loc; \ 434 inner->jcn_loc = outer->jcn_loc; \ 435 inner->a_loc = (mumpsscalar *)outer->a_loc; \ 436 inner->blkptr = outer->blkptr; \ 437 inner->blkvar = outer->blkvar; \ 438 inner->perm_in = outer->perm_in; \ 439 inner->rhs = (mumpsscalar *)outer->rhs; \ 440 inner->redrhs = (mumpsscalar *)outer->redrhs; \ 441 inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \ 442 inner->sol_loc = (mumpsscalar *)outer->sol_loc; \ 443 inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \ 444 inner->irhs_sparse = outer->irhs_sparse; \ 445 inner->irhs_ptr = outer->irhs_ptr; \ 446 inner->isol_loc = outer->isol_loc; \ 447 inner->irhs_loc = outer->irhs_loc; \ 448 inner->nrhs = outer->nrhs; \ 449 inner->lrhs = outer->lrhs; \ 450 inner->lredrhs = outer->lredrhs; \ 451 inner->nz_rhs = outer->nz_rhs; \ 452 inner->lsol_loc = outer->lsol_loc; \ 453 inner->nloc_rhs = outer->nloc_rhs; \ 454 inner->lrhs_loc = outer->lrhs_loc; \ 455 inner->schur_lld = outer->schur_lld; \ 456 inner->size_schur = outer->size_schur; \ 457 inner->listvar_schur = outer->listvar_schur; \ 458 inner->schur = (mumpsscalar *)outer->schur; \ 459 } while (0) 460 461 #define PostMumpsCall(inner, outer) \ 462 do { \ 463 outer->pivnul_list = inner->pivnul_list; \ 464 outer->mapping = inner->mapping; \ 465 } while (0) 466 467 // Entry for PETSc to call mumps 468 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer) 469 { 470 PetscFunctionBegin; 471 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 472 #if defined(PETSC_USE_COMPLEX) 473 if (outer->precision == PETSC_PRECISION_SINGLE) { 474 CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id; 475 PreMumpsCall(inner, outer, CMUMPS_COMPLEX); 476 PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner)); 477 PostMumpsCall(inner, outer); 478 } else { 479 ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id; 480 PreMumpsCall(inner, outer, ZMUMPS_COMPLEX); 481 PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner)); 482 PostMumpsCall(inner, outer); 483 } 484 #else 485 if (outer->precision == PETSC_PRECISION_SINGLE) { 486 SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id; 487 PreMumpsCall(inner, outer, SMUMPS_REAL); 488 PetscStackCallExternalVoid("smumps_c", smumps_c(inner)); 489 PostMumpsCall(inner, outer); 490 } else { 491 DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id; 492 PreMumpsCall(inner, outer, DMUMPS_REAL); 493 PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner)); 494 PostMumpsCall(inner, outer); 495 } 496 #endif 497 #else 498 MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id; 499 PreMumpsCall(inner, outer, MumpsScalar); 500 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner)); 501 PostMumpsCall(inner, outer); 502 #endif 503 PetscFunctionReturn(PETSC_SUCCESS); 504 } 505 506 /* macros s.t. indices match MUMPS documentation */ 507 #define ICNTL(I) icntl[(I) - 1] 508 #define INFOG(I) infog[(I) - 1] 509 #define INFO(I) info[(I) - 1] 510 511 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I! 512 #if defined(PETSC_USE_COMPLEX) 513 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i) 514 #else 515 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I]) 516 #endif 517 518 // Get a value from MumpsReal arrays. The value is convertible to PetscReal. 519 #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1]) 520 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1]) 521 #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1]) 522 523 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL> 524 #define ID_CNTL_SET(ID, I, VAL) \ 525 do { \ 526 if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \ 527 else ((double *)(ID).cntl)[(I) - 1] = (VAL); \ 528 } while (0) 529 530 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 531 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 532 #define PetscMUMPS_c(mumps) \ 533 do { \ 534 if (mumps->use_petsc_omp_support) { \ 535 if (mumps->is_omp_master) { \ 536 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 537 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 538 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 539 PetscCall(PetscFPTrapPop()); \ 540 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 541 } \ 542 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 543 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 544 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 545 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 546 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 547 */ \ 548 MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \ 549 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 550 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 551 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 552 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 553 } else { \ 554 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 555 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 556 PetscCall(PetscFPTrapPop()); \ 557 } \ 558 } while (0) 559 #else 560 #define PetscMUMPS_c(mumps) \ 561 do { \ 562 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 563 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 564 PetscCall(PetscFPTrapPop()); \ 565 } while (0) 566 #endif 567 568 typedef struct Mat_MUMPS Mat_MUMPS; 569 struct Mat_MUMPS { 570 XMUMPS_STRUC_C id; 571 572 MatStructure matstruc; 573 PetscMPIInt myid, petsc_size; 574 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 575 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 576 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 577 PetscMUMPSInt sym; 578 MPI_Comm mumps_comm; 579 PetscMUMPSInt *ICNTL_pre; 580 PetscReal *CNTL_pre; 581 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 582 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 583 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 584 PetscMUMPSInt ICNTL26; 585 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 586 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 587 PetscInt *rhs_nrow, max_nrhs; 588 PetscMPIInt *rhs_recvcounts, *rhs_disps; 589 PetscScalar *rhs_loc, *rhs_recvbuf; 590 #endif 591 Vec b_seq, x_seq; 592 PetscInt ninfo, *info; /* which INFO to display */ 593 PetscInt sizeredrhs; 594 PetscScalar *schur_sol; 595 PetscInt schur_sizesol; 596 PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision 597 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 598 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 599 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 600 601 /* Support for MATNEST */ 602 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 603 PetscCount *nest_vals_start; 604 PetscScalar *nest_vals; 605 606 /* stuff used by petsc/mumps OpenMP support*/ 607 PetscBool use_petsc_omp_support; 608 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 609 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 610 PetscCount *recvcount; /* a collection of nnz on omp_master */ 611 PetscMPIInt tag, omp_comm_size; 612 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 613 MPI_Request *reqs; 614 }; 615 616 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 617 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 618 */ 619 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 620 { 621 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 622 623 PetscFunctionBegin; 624 #if defined(PETSC_USE_64BIT_INDICES) 625 { 626 PetscInt i; 627 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 628 PetscCall(PetscFree(mumps->ia_alloc)); 629 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 630 mumps->cur_ilen = nrow + 1; 631 } 632 if (nnz > mumps->cur_jlen) { 633 PetscCall(PetscFree(mumps->ja_alloc)); 634 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 635 mumps->cur_jlen = nnz; 636 } 637 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 638 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 639 *ia_mumps = mumps->ia_alloc; 640 *ja_mumps = mumps->ja_alloc; 641 } 642 #else 643 *ia_mumps = ia; 644 *ja_mumps = ja; 645 #endif 646 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 651 { 652 PetscFunctionBegin; 653 PetscCall(PetscFree(mumps->id.listvar_schur)); 654 PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID() 655 PetscCall(PetscFree(mumps->schur_sol)); 656 mumps->id.size_schur = 0; 657 mumps->id.schur_lld = 0; 658 if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 /* solve with rhs in mumps->id.redrhs and return in the same location */ 663 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 664 { 665 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 666 Mat S, B, X; // solve S*X = B; all three matrices are dense 667 MatFactorSchurStatus schurstatus; 668 PetscInt sizesol; 669 const PetscScalar *xarray; 670 671 PetscFunctionBegin; 672 PetscCall(MatFactorFactorizeSchurComplement(F)); 673 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 674 PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs)); 675 676 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B)); 677 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 678 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 679 PetscCall(MatBindToCPU(B, S->boundtocpu)); 680 #endif 681 switch (schurstatus) { 682 case MAT_FACTOR_SCHUR_FACTORED: 683 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X)); 684 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 685 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 686 PetscCall(MatBindToCPU(X, S->boundtocpu)); 687 #endif 688 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 689 PetscCall(MatMatSolveTranspose(S, B, X)); 690 } else { 691 PetscCall(MatMatSolve(S, B, X)); 692 } 693 break; 694 case MAT_FACTOR_SCHUR_INVERTED: 695 sizesol = mumps->id.nrhs * mumps->id.size_schur; 696 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 697 PetscCall(PetscFree(mumps->schur_sol)); 698 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 699 mumps->schur_sizesol = sizesol; 700 } 701 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 702 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 703 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 704 PetscCall(MatBindToCPU(X, S->boundtocpu)); 705 #endif 706 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 707 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 708 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 709 } else { 710 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 711 } 712 PetscCall(MatProductSetFromOptions(X)); 713 PetscCall(MatProductSymbolic(X)); 714 PetscCall(MatProductNumeric(X)); 715 716 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 717 break; 718 default: 719 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 720 } 721 // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device. 722 PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs. 723 PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs)); 724 PetscCall(MatDenseRestoreArrayRead(X, &xarray)); 725 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 726 PetscCall(MatDestroy(&B)); 727 PetscCall(MatDestroy(&X)); 728 PetscFunctionReturn(PETSC_SUCCESS); 729 } 730 731 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 732 { 733 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 734 735 PetscFunctionBegin; 736 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 if (!expansion) { /* prepare for the condensation step */ 740 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 741 /* allocate MUMPS internal array to store reduced right-hand sides */ 742 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 743 mumps->id.lredrhs = mumps->id.size_schur; 744 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 745 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 746 PetscCall(PetscFree(mumps->redrhs)); 747 PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs)); 748 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs)); 749 } 750 } else { /* prepare for the expansion step */ 751 PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */ 752 mumps->id.ICNTL(26) = 2; /* expansion phase */ 753 PetscMUMPS_c(mumps); 754 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 755 /* restore defaults */ 756 mumps->id.ICNTL(26) = -1; 757 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 758 if (mumps->id.nrhs > 1) { 759 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 760 PetscCall(PetscFree(mumps->redrhs)); 761 mumps->id.redrhs_len = 0; 762 mumps->id.lredrhs = 0; 763 mumps->sizeredrhs = 0; 764 } 765 } 766 PetscFunctionReturn(PETSC_SUCCESS); 767 } 768 769 /* 770 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 771 772 input: 773 A - matrix in aij,baij or sbaij format 774 shift - 0: C style output triple; 1: Fortran style output triple. 775 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 776 MAT_REUSE_MATRIX: only the values in v array are updated 777 output: 778 nnz - dim of r, c, and v (number of local nonzero entries of A) 779 r, c, v - row and col index, matrix values (matrix triples) 780 781 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 782 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 783 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 784 785 */ 786 787 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 788 { 789 const PetscScalar *av; 790 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 791 PetscCount nz, rnz, k; 792 PetscMUMPSInt *row, *col; 793 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 794 795 PetscFunctionBegin; 796 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 797 if (reuse == MAT_INITIAL_MATRIX) { 798 nz = aa->nz; 799 ai = aa->i; 800 aj = aa->j; 801 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 802 for (PetscCount i = k = 0; i < M; i++) { 803 rnz = ai[i + 1] - ai[i]; 804 ajj = aj + ai[i]; 805 for (PetscCount j = 0; j < rnz; j++) { 806 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 807 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 808 k++; 809 } 810 } 811 mumps->val = (PetscScalar *)av; 812 mumps->irn = row; 813 mumps->jcn = col; 814 mumps->nnz = nz; 815 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 816 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 817 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 818 PetscFunctionReturn(PETSC_SUCCESS); 819 } 820 821 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 822 { 823 PetscCount nz, i, j, k, r; 824 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 825 PetscMUMPSInt *row, *col; 826 827 PetscFunctionBegin; 828 nz = a->sliidx[a->totalslices]; 829 if (reuse == MAT_INITIAL_MATRIX) { 830 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 831 for (i = k = 0; i < a->totalslices; i++) { 832 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 833 } 834 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 835 mumps->irn = row; 836 mumps->jcn = col; 837 mumps->nnz = nz; 838 mumps->val = a->val; 839 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 840 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 845 { 846 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 847 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 848 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 849 PetscInt bs; 850 PetscMUMPSInt *row, *col; 851 852 PetscFunctionBegin; 853 if (reuse == MAT_INITIAL_MATRIX) { 854 PetscCall(MatGetBlockSize(A, &bs)); 855 M = A->rmap->N / bs; 856 ai = aa->i; 857 aj = aa->j; 858 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 859 for (i = 0; i < M; i++) { 860 ajj = aj + ai[i]; 861 rnz = ai[i + 1] - ai[i]; 862 for (k = 0; k < rnz; k++) { 863 for (j = 0; j < bs; j++) { 864 for (m = 0; m < bs; m++) { 865 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 866 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 867 idx++; 868 } 869 } 870 } 871 } 872 mumps->irn = row; 873 mumps->jcn = col; 874 mumps->nnz = nz; 875 mumps->val = aa->a; 876 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 877 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 882 { 883 const PetscInt *ai, *aj, *ajj; 884 PetscInt bs; 885 PetscCount nz, rnz, i, j, k, m; 886 PetscMUMPSInt *row, *col; 887 PetscScalar *val; 888 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 889 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 890 #if defined(PETSC_USE_COMPLEX) 891 PetscBool isset, hermitian; 892 #endif 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_COMPLEX) 896 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 897 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 898 #endif 899 ai = aa->i; 900 aj = aa->j; 901 PetscCall(MatGetBlockSize(A, &bs)); 902 if (reuse == MAT_INITIAL_MATRIX) { 903 const PetscCount alloc_size = aa->nz * bs2; 904 905 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 906 if (bs > 1) { 907 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 908 mumps->val = mumps->val_alloc; 909 } else { 910 mumps->val = aa->a; 911 } 912 mumps->irn = row; 913 mumps->jcn = col; 914 } else { 915 row = mumps->irn; 916 col = mumps->jcn; 917 } 918 val = mumps->val; 919 920 nz = 0; 921 if (bs > 1) { 922 for (i = 0; i < mbs; i++) { 923 rnz = ai[i + 1] - ai[i]; 924 ajj = aj + ai[i]; 925 for (j = 0; j < rnz; j++) { 926 for (k = 0; k < bs; k++) { 927 for (m = 0; m < bs; m++) { 928 if (ajj[j] > i || k >= m) { 929 if (reuse == MAT_INITIAL_MATRIX) { 930 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 931 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 932 } 933 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 934 } 935 } 936 } 937 } 938 } 939 } else if (reuse == MAT_INITIAL_MATRIX) { 940 for (i = 0; i < mbs; i++) { 941 rnz = ai[i + 1] - ai[i]; 942 ajj = aj + ai[i]; 943 for (j = 0; j < rnz; j++) { 944 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 945 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 946 nz++; 947 } 948 } 949 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 950 } else if (mumps->nest_vals) 951 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 952 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 953 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 954 PetscFunctionReturn(PETSC_SUCCESS); 955 } 956 957 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 958 { 959 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 960 PetscCount nz, rnz, i, j; 961 const PetscScalar *av, *v1; 962 PetscScalar *val; 963 PetscMUMPSInt *row, *col; 964 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 965 PetscBool diagDense; 966 #if defined(PETSC_USE_COMPLEX) 967 PetscBool hermitian, isset; 968 #endif 969 970 PetscFunctionBegin; 971 #if defined(PETSC_USE_COMPLEX) 972 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 973 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 974 #endif 975 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 976 ai = aa->i; 977 aj = aa->j; 978 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 979 if (reuse == MAT_INITIAL_MATRIX) { 980 /* count nz in the upper triangular part of A */ 981 nz = 0; 982 if (!diagDense) { 983 for (i = 0; i < M; i++) { 984 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 985 for (j = ai[i]; j < ai[i + 1]; j++) { 986 if (aj[j] < i) continue; 987 nz++; 988 } 989 } else { 990 nz += ai[i + 1] - adiag[i]; 991 } 992 } 993 } else { 994 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 995 } 996 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 997 PetscCall(PetscMalloc1(nz, &val)); 998 mumps->nnz = nz; 999 mumps->irn = row; 1000 mumps->jcn = col; 1001 mumps->val = mumps->val_alloc = val; 1002 1003 nz = 0; 1004 if (!diagDense) { 1005 for (i = 0; i < M; i++) { 1006 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1007 for (j = ai[i]; j < ai[i + 1]; j++) { 1008 if (aj[j] < i) continue; 1009 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1010 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 1011 val[nz] = av[j]; 1012 nz++; 1013 } 1014 } else { 1015 rnz = ai[i + 1] - adiag[i]; 1016 ajj = aj + adiag[i]; 1017 v1 = av + adiag[i]; 1018 for (j = 0; j < rnz; j++) { 1019 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1020 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1021 val[nz++] = v1[j]; 1022 } 1023 } 1024 } 1025 } else { 1026 for (i = 0; i < M; i++) { 1027 rnz = ai[i + 1] - adiag[i]; 1028 ajj = aj + adiag[i]; 1029 v1 = av + adiag[i]; 1030 for (j = 0; j < rnz; j++) { 1031 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1032 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1033 val[nz++] = v1[j]; 1034 } 1035 } 1036 } 1037 } else { 1038 nz = 0; 1039 val = mumps->val; 1040 if (!diagDense) { 1041 for (i = 0; i < M; i++) { 1042 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1043 for (j = ai[i]; j < ai[i + 1]; j++) { 1044 if (aj[j] < i) continue; 1045 val[nz++] = av[j]; 1046 } 1047 } else { 1048 rnz = ai[i + 1] - adiag[i]; 1049 v1 = av + adiag[i]; 1050 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1051 } 1052 } 1053 } else { 1054 for (i = 0; i < M; i++) { 1055 rnz = ai[i + 1] - adiag[i]; 1056 v1 = av + adiag[i]; 1057 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1058 } 1059 } 1060 } 1061 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 1062 PetscFunctionReturn(PETSC_SUCCESS); 1063 } 1064 1065 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1066 { 1067 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 1068 PetscInt bs; 1069 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 1070 PetscMUMPSInt *row, *col; 1071 const PetscScalar *av, *bv, *v1, *v2; 1072 PetscScalar *val; 1073 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 1074 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 1075 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1076 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 1077 #if defined(PETSC_USE_COMPLEX) 1078 PetscBool hermitian, isset; 1079 #endif 1080 1081 PetscFunctionBegin; 1082 #if defined(PETSC_USE_COMPLEX) 1083 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1084 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1085 #endif 1086 PetscCall(MatGetBlockSize(A, &bs)); 1087 rstart = A->rmap->rstart; 1088 ai = aa->i; 1089 aj = aa->j; 1090 bi = bb->i; 1091 bj = bb->j; 1092 av = aa->a; 1093 bv = bb->a; 1094 1095 garray = mat->garray; 1096 1097 if (reuse == MAT_INITIAL_MATRIX) { 1098 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 1099 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1100 PetscCall(PetscMalloc1(nz, &val)); 1101 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 1102 mumps->irn = row; 1103 mumps->jcn = col; 1104 mumps->val = mumps->val_alloc = val; 1105 } else { 1106 val = mumps->val; 1107 } 1108 1109 jj = 0; 1110 irow = rstart; 1111 for (i = 0; i < mbs; i++) { 1112 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1113 countA = ai[i + 1] - ai[i]; 1114 countB = bi[i + 1] - bi[i]; 1115 bjj = bj + bi[i]; 1116 v1 = av + ai[i] * bs2; 1117 v2 = bv + bi[i] * bs2; 1118 1119 if (bs > 1) { 1120 /* A-part */ 1121 for (j = 0; j < countA; j++) { 1122 for (k = 0; k < bs; k++) { 1123 for (m = 0; m < bs; m++) { 1124 if (rstart + ajj[j] * bs > irow || k >= m) { 1125 if (reuse == MAT_INITIAL_MATRIX) { 1126 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1127 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 1128 } 1129 val[jj++] = v1[j * bs2 + m + k * bs]; 1130 } 1131 } 1132 } 1133 } 1134 1135 /* B-part */ 1136 for (j = 0; j < countB; j++) { 1137 for (k = 0; k < bs; k++) { 1138 for (m = 0; m < bs; m++) { 1139 if (reuse == MAT_INITIAL_MATRIX) { 1140 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1141 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 1142 } 1143 val[jj++] = v2[j * bs2 + m + k * bs]; 1144 } 1145 } 1146 } 1147 } else { 1148 /* A-part */ 1149 for (j = 0; j < countA; j++) { 1150 if (reuse == MAT_INITIAL_MATRIX) { 1151 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1152 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1153 } 1154 val[jj++] = v1[j]; 1155 } 1156 1157 /* B-part */ 1158 for (j = 0; j < countB; j++) { 1159 if (reuse == MAT_INITIAL_MATRIX) { 1160 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1161 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1162 } 1163 val[jj++] = v2[j]; 1164 } 1165 } 1166 irow += bs; 1167 } 1168 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1173 { 1174 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1175 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 1176 PetscMUMPSInt *row, *col; 1177 const PetscScalar *av, *bv, *v1, *v2; 1178 PetscScalar *val; 1179 Mat Ad, Ao; 1180 Mat_SeqAIJ *aa; 1181 Mat_SeqAIJ *bb; 1182 1183 PetscFunctionBegin; 1184 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1185 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1186 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1187 1188 aa = (Mat_SeqAIJ *)Ad->data; 1189 bb = (Mat_SeqAIJ *)Ao->data; 1190 ai = aa->i; 1191 aj = aa->j; 1192 bi = bb->i; 1193 bj = bb->j; 1194 1195 rstart = A->rmap->rstart; 1196 cstart = A->cmap->rstart; 1197 1198 if (reuse == MAT_INITIAL_MATRIX) { 1199 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 1200 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1201 PetscCall(PetscMalloc1(nz, &val)); 1202 mumps->nnz = nz; 1203 mumps->irn = row; 1204 mumps->jcn = col; 1205 mumps->val = mumps->val_alloc = val; 1206 } else { 1207 val = mumps->val; 1208 } 1209 1210 jj = 0; 1211 irow = rstart; 1212 for (i = 0; i < m; i++) { 1213 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1214 countA = ai[i + 1] - ai[i]; 1215 countB = bi[i + 1] - bi[i]; 1216 bjj = bj + bi[i]; 1217 v1 = av + ai[i]; 1218 v2 = bv + bi[i]; 1219 1220 /* A-part */ 1221 for (j = 0; j < countA; j++) { 1222 if (reuse == MAT_INITIAL_MATRIX) { 1223 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1224 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 1225 } 1226 val[jj++] = v1[j]; 1227 } 1228 1229 /* B-part */ 1230 for (j = 0; j < countB; j++) { 1231 if (reuse == MAT_INITIAL_MATRIX) { 1232 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1233 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1234 } 1235 val[jj++] = v2[j]; 1236 } 1237 irow++; 1238 } 1239 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1240 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1245 { 1246 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 1247 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 1248 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1249 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 1250 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 1251 const PetscInt bs2 = mat->bs2; 1252 PetscInt bs; 1253 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 1254 PetscMUMPSInt *row, *col; 1255 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 1256 PetscScalar *val; 1257 1258 PetscFunctionBegin; 1259 PetscCall(MatGetBlockSize(A, &bs)); 1260 if (reuse == MAT_INITIAL_MATRIX) { 1261 nz = bs2 * (aa->nz + bb->nz); 1262 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1263 PetscCall(PetscMalloc1(nz, &val)); 1264 mumps->nnz = nz; 1265 mumps->irn = row; 1266 mumps->jcn = col; 1267 mumps->val = mumps->val_alloc = val; 1268 } else { 1269 val = mumps->val; 1270 } 1271 1272 jj = 0; 1273 irow = rstart; 1274 for (i = 0; i < mbs; i++) { 1275 countA = ai[i + 1] - ai[i]; 1276 countB = bi[i + 1] - bi[i]; 1277 ajj = aj + ai[i]; 1278 bjj = bj + bi[i]; 1279 v1 = av + bs2 * ai[i]; 1280 v2 = bv + bs2 * bi[i]; 1281 1282 idx = 0; 1283 /* A-part */ 1284 for (k = 0; k < countA; k++) { 1285 for (j = 0; j < bs; j++) { 1286 for (n = 0; n < bs; n++) { 1287 if (reuse == MAT_INITIAL_MATRIX) { 1288 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1289 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 1290 } 1291 val[jj++] = v1[idx++]; 1292 } 1293 } 1294 } 1295 1296 idx = 0; 1297 /* B-part */ 1298 for (k = 0; k < countB; k++) { 1299 for (j = 0; j < bs; j++) { 1300 for (n = 0; n < bs; n++) { 1301 if (reuse == MAT_INITIAL_MATRIX) { 1302 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1303 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 1304 } 1305 val[jj++] = v2[idx++]; 1306 } 1307 } 1308 } 1309 irow += bs; 1310 } 1311 PetscFunctionReturn(PETSC_SUCCESS); 1312 } 1313 1314 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1315 { 1316 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1317 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 1318 PetscMUMPSInt *row, *col; 1319 const PetscScalar *av, *bv, *v1, *v2; 1320 PetscScalar *val; 1321 Mat Ad, Ao; 1322 Mat_SeqAIJ *aa; 1323 Mat_SeqAIJ *bb; 1324 #if defined(PETSC_USE_COMPLEX) 1325 PetscBool hermitian, isset; 1326 #endif 1327 1328 PetscFunctionBegin; 1329 #if defined(PETSC_USE_COMPLEX) 1330 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1331 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1332 #endif 1333 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1334 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1335 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1336 1337 aa = (Mat_SeqAIJ *)Ad->data; 1338 bb = (Mat_SeqAIJ *)Ao->data; 1339 ai = aa->i; 1340 aj = aa->j; 1341 bi = bb->i; 1342 bj = bb->j; 1343 PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL)); 1344 rstart = A->rmap->rstart; 1345 1346 if (reuse == MAT_INITIAL_MATRIX) { 1347 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 1348 nzb = 0; /* num of upper triangular entries in mat->B */ 1349 for (i = 0; i < m; i++) { 1350 nza += (ai[i + 1] - adiag[i]); 1351 countB = bi[i + 1] - bi[i]; 1352 bjj = bj + bi[i]; 1353 for (j = 0; j < countB; j++) { 1354 if (garray[bjj[j]] > rstart) nzb++; 1355 } 1356 } 1357 1358 nz = nza + nzb; /* total nz of upper triangular part of mat */ 1359 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1360 PetscCall(PetscMalloc1(nz, &val)); 1361 mumps->nnz = nz; 1362 mumps->irn = row; 1363 mumps->jcn = col; 1364 mumps->val = mumps->val_alloc = val; 1365 } else { 1366 val = mumps->val; 1367 } 1368 1369 jj = 0; 1370 irow = rstart; 1371 for (i = 0; i < m; i++) { 1372 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 1373 v1 = av + adiag[i]; 1374 countA = ai[i + 1] - adiag[i]; 1375 countB = bi[i + 1] - bi[i]; 1376 bjj = bj + bi[i]; 1377 v2 = bv + bi[i]; 1378 1379 /* A-part */ 1380 for (j = 0; j < countA; j++) { 1381 if (reuse == MAT_INITIAL_MATRIX) { 1382 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1383 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1384 } 1385 val[jj++] = v1[j]; 1386 } 1387 1388 /* B-part */ 1389 for (j = 0; j < countB; j++) { 1390 if (garray[bjj[j]] > rstart) { 1391 if (reuse == MAT_INITIAL_MATRIX) { 1392 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1393 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1394 } 1395 val[jj++] = v2[j]; 1396 } 1397 } 1398 irow++; 1399 } 1400 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1401 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1402 PetscFunctionReturn(PETSC_SUCCESS); 1403 } 1404 1405 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1406 { 1407 const PetscScalar *av; 1408 const PetscInt M = A->rmap->n; 1409 PetscCount i; 1410 PetscMUMPSInt *row, *col; 1411 Vec v; 1412 1413 PetscFunctionBegin; 1414 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1415 PetscCall(VecGetArrayRead(v, &av)); 1416 if (reuse == MAT_INITIAL_MATRIX) { 1417 PetscCall(PetscMalloc2(M, &row, M, &col)); 1418 for (i = 0; i < M; i++) { 1419 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1420 col[i] = row[i]; 1421 } 1422 mumps->val = (PetscScalar *)av; 1423 mumps->irn = row; 1424 mumps->jcn = col; 1425 mumps->nnz = M; 1426 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1427 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1428 PetscCall(VecRestoreArrayRead(v, &av)); 1429 PetscFunctionReturn(PETSC_SUCCESS); 1430 } 1431 1432 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1433 { 1434 PetscScalar *v; 1435 const PetscInt m = A->rmap->n, N = A->cmap->N; 1436 PetscInt lda; 1437 PetscCount i, j; 1438 PetscMUMPSInt *row, *col; 1439 1440 PetscFunctionBegin; 1441 PetscCall(MatDenseGetArray(A, &v)); 1442 PetscCall(MatDenseGetLDA(A, &lda)); 1443 if (reuse == MAT_INITIAL_MATRIX) { 1444 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1445 for (i = 0; i < m; i++) { 1446 col[i] = 0; 1447 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1448 } 1449 for (j = 1; j < N; j++) { 1450 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1451 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1452 } 1453 if (lda == m) mumps->val = v; 1454 else { 1455 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1456 mumps->val_alloc = mumps->val; 1457 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1458 } 1459 mumps->irn = row; 1460 mumps->jcn = col; 1461 mumps->nnz = m * N; 1462 } else { 1463 if (lda == m && !mumps->nest_vals) mumps->val = v; 1464 else { 1465 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1466 } 1467 } 1468 PetscCall(MatDenseRestoreArray(A, &v)); 1469 PetscFunctionReturn(PETSC_SUCCESS); 1470 } 1471 1472 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 1473 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 1474 // and its rows and columns permuted 1475 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 1476 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 1477 { 1478 Mat A; 1479 PetscScalar s[2]; 1480 PetscBool isTrans, isHTrans, compare; 1481 1482 PetscFunctionBegin; 1483 do { 1484 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1485 if (isTrans) { 1486 PetscCall(MatTransposeGetMat(*sub, &A)); 1487 isHTrans = PETSC_FALSE; 1488 } else { 1489 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1490 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 1491 } 1492 compare = (PetscBool)(isTrans || isHTrans); 1493 if (compare) { 1494 if (vshift && vscale) { 1495 PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1496 if (!*conjugate) { 1497 *vshift += s[0] * *vscale; 1498 *vscale *= s[1]; 1499 } else { 1500 *vshift += PetscConj(s[0]) * *vscale; 1501 *vscale *= PetscConj(s[1]); 1502 } 1503 } 1504 if (swap) *swap = (PetscBool)!*swap; 1505 if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 1506 *sub = A; 1507 } 1508 } while (compare); 1509 PetscFunctionReturn(PETSC_SUCCESS); 1510 } 1511 1512 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1513 { 1514 Mat **mats; 1515 PetscInt nr, nc; 1516 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1517 1518 PetscFunctionBegin; 1519 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1520 if (reuse == MAT_INITIAL_MATRIX) { 1521 PetscMUMPSInt *irns, *jcns; 1522 PetscScalar *vals; 1523 PetscCount totnnz, cumnnz, maxnnz; 1524 PetscInt *pjcns_w, Mbs = 0; 1525 IS *rows, *cols; 1526 PetscInt **rows_idx, **cols_idx; 1527 1528 cumnnz = 0; 1529 maxnnz = 0; 1530 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1531 for (PetscInt r = 0; r < nr; r++) { 1532 for (PetscInt c = 0; c < nc; c++) { 1533 Mat sub = mats[r][c]; 1534 1535 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1536 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1537 if (sub) { 1538 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1539 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 1540 MatInfo info; 1541 1542 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 1543 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1544 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1545 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1546 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1547 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1548 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1549 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1550 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1551 1552 if (chol) { 1553 if (r == c) { 1554 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1555 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1556 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1557 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1558 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1559 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1560 } else { 1561 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1562 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1563 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1564 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1565 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1566 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1567 } 1568 } else { 1569 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1570 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1571 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1572 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1573 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1574 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1575 } 1576 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1577 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1578 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1579 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1580 maxnnz = PetscMax(maxnnz, info.nz_used); 1581 } 1582 } 1583 } 1584 1585 /* Allocate total COO */ 1586 totnnz = cumnnz; 1587 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1588 PetscCall(PetscMalloc1(totnnz, &vals)); 1589 1590 /* Handle rows and column maps 1591 We directly map rows and use an SF for the columns */ 1592 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1593 PetscCall(MatNestGetISs(A, rows, cols)); 1594 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1595 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1596 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1597 else (void)maxnnz; 1598 1599 cumnnz = 0; 1600 for (PetscInt r = 0; r < nr; r++) { 1601 for (PetscInt c = 0; c < nc; c++) { 1602 Mat sub = mats[r][c]; 1603 const PetscInt *ridx = rows_idx[r]; 1604 const PetscInt *cidx = cols_idx[c]; 1605 PetscScalar vscale = 1.0, vshift = 0.0; 1606 PetscInt rst, size, bs; 1607 PetscSF csf; 1608 PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 1609 PetscLayout cmap; 1610 PetscInt innz; 1611 1612 mumps->nest_vals_start[r * nc + c] = cumnnz; 1613 if (c == r) { 1614 PetscCall(ISGetSize(rows[r], &size)); 1615 if (!mumps->nest_convert_to_triples[r * nc + c]) { 1616 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1617 } 1618 PetscCall(MatGetBlockSize(sub, &bs)); 1619 Mbs += size / bs; 1620 } 1621 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1622 1623 /* Extract inner blocks if needed */ 1624 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 1625 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1626 1627 /* Get column layout to map off-process columns */ 1628 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1629 1630 /* Get row start to map on-process rows */ 1631 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1632 1633 /* Directly use the mumps datastructure and use C ordering for now */ 1634 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1635 1636 /* Swap the role of rows and columns indices for transposed blocks 1637 since we need values with global final ordering */ 1638 if (swap) { 1639 cidx = rows_idx[r]; 1640 ridx = cols_idx[c]; 1641 } 1642 1643 /* Communicate column indices 1644 This could have been done with a single SF but it would have complicated the code a lot. 1645 But since we do it only once, we pay the price of setting up an SF for each block */ 1646 if (PetscDefined(USE_64BIT_INDICES)) { 1647 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1648 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1649 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1650 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1651 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1652 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1653 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1654 PetscCall(PetscSFDestroy(&csf)); 1655 1656 /* Import indices: use direct map for rows and mapped indices for columns */ 1657 if (swap) { 1658 for (PetscInt k = 0; k < mumps->nnz; k++) { 1659 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1660 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1661 } 1662 } else { 1663 for (PetscInt k = 0; k < mumps->nnz; k++) { 1664 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1665 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1666 } 1667 } 1668 1669 /* Import values to full COO */ 1670 if (conjugate) { /* conjugate the entries */ 1671 PetscScalar *v = vals + cumnnz; 1672 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 1673 } else if (vscale != 1.0) { 1674 PetscScalar *v = vals + cumnnz; 1675 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 1676 } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1677 1678 /* Shift new starting point and sanity check */ 1679 cumnnz += mumps->nnz; 1680 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1681 1682 /* Free scratch memory */ 1683 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1684 PetscCall(PetscFree(mumps->val_alloc)); 1685 mumps->val = NULL; 1686 mumps->nnz = 0; 1687 } 1688 } 1689 if (mumps->id.ICNTL(15) == 1) { 1690 if (Mbs != A->rmap->N) { 1691 PetscMPIInt rank, size; 1692 1693 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1694 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1695 if (rank == 0) { 1696 PetscInt shift = 0; 1697 1698 PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 1699 PetscCall(PetscFree(mumps->id.blkptr)); 1700 PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 1701 mumps->id.blkptr[0] = 1; 1702 for (PetscInt i = 0; i < size; ++i) { 1703 for (PetscInt r = 0; r < nr; r++) { 1704 Mat sub = mats[r][r]; 1705 const PetscInt *ranges; 1706 PetscInt bs; 1707 1708 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1709 PetscCall(MatGetOwnershipRanges(sub, &ranges)); 1710 PetscCall(MatGetBlockSize(sub, &bs)); 1711 for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 1712 shift += (ranges[i + 1] - ranges[i]) / bs; 1713 } 1714 } 1715 } 1716 } else mumps->id.ICNTL(15) = 0; 1717 } 1718 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1719 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1720 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1721 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1722 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1723 mumps->nest_vals_start[nr * nc] = cumnnz; 1724 1725 /* Set pointers for final MUMPS data structure */ 1726 mumps->nest_vals = vals; 1727 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1728 mumps->val = vals; 1729 mumps->irn = irns; 1730 mumps->jcn = jcns; 1731 mumps->nnz = cumnnz; 1732 } else { 1733 PetscScalar *oval = mumps->nest_vals; 1734 for (PetscInt r = 0; r < nr; r++) { 1735 for (PetscInt c = 0; c < nc; c++) { 1736 PetscBool conjugate = PETSC_FALSE; 1737 Mat sub = mats[r][c]; 1738 PetscScalar vscale = 1.0, vshift = 0.0; 1739 PetscInt midx = r * nc + c; 1740 1741 if (!mumps->nest_convert_to_triples[midx]) continue; 1742 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 1743 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1744 mumps->val = oval + mumps->nest_vals_start[midx]; 1745 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1746 if (conjugate) { 1747 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1748 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 1749 } else if (vscale != 1.0) { 1750 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1751 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 1752 } 1753 } 1754 } 1755 mumps->val = oval; 1756 } 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 1760 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1761 { 1762 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1763 1764 PetscFunctionBegin; 1765 PetscCall(PetscFree(mumps->id.isol_loc)); 1766 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1767 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1768 PetscCall(VecDestroy(&mumps->b_seq)); 1769 PetscCall(VecDestroy(&mumps->x_seq)); 1770 PetscCall(PetscFree(mumps->id.perm_in)); 1771 PetscCall(PetscFree(mumps->id.blkvar)); 1772 PetscCall(PetscFree(mumps->id.blkptr)); 1773 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1774 PetscCall(PetscFree(mumps->val_alloc)); 1775 PetscCall(PetscFree(mumps->info)); 1776 PetscCall(PetscFree(mumps->ICNTL_pre)); 1777 PetscCall(PetscFree(mumps->CNTL_pre)); 1778 PetscCall(MatMumpsResetSchur_Private(mumps)); 1779 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1780 mumps->id.job = JOB_END; 1781 PetscMUMPS_c(mumps); 1782 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1783 if (mumps->mumps_comm != MPI_COMM_NULL) { 1784 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1785 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1786 } 1787 } 1788 PetscCall(MatMumpsFreeInternalID(&mumps->id)); 1789 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1790 if (mumps->use_petsc_omp_support) { 1791 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1792 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1793 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1794 } 1795 #endif 1796 PetscCall(PetscFree(mumps->ia_alloc)); 1797 PetscCall(PetscFree(mumps->ja_alloc)); 1798 PetscCall(PetscFree(mumps->recvcount)); 1799 PetscCall(PetscFree(mumps->reqs)); 1800 PetscCall(PetscFree(mumps->irhs_loc)); 1801 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1802 PetscCall(PetscFree(mumps->nest_vals)); 1803 PetscCall(PetscFree(A->data)); 1804 1805 /* clear composed functions */ 1806 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1807 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1808 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1809 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1810 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1811 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1812 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1813 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1814 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1815 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1816 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1817 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1818 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1819 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1820 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 1821 PetscFunctionReturn(PETSC_SUCCESS); 1822 } 1823 1824 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1825 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1826 { 1827 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1828 const PetscMPIInt ompsize = mumps->omp_comm_size; 1829 PetscInt i, m, M, rstart; 1830 1831 PetscFunctionBegin; 1832 PetscCall(MatGetSize(A, &M, NULL)); 1833 PetscCall(MatGetLocalSize(A, &m, NULL)); 1834 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1835 if (ompsize == 1) { 1836 if (!mumps->irhs_loc) { 1837 mumps->nloc_rhs = (PetscMUMPSInt)m; 1838 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1839 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1840 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1841 } 1842 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1843 } else { 1844 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1845 const PetscInt *ranges; 1846 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1847 MPI_Group petsc_group, omp_group; 1848 PetscScalar *recvbuf = NULL; 1849 1850 if (mumps->is_omp_master) { 1851 /* Lazily initialize the omp stuff for distributed rhs */ 1852 if (!mumps->irhs_loc) { 1853 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1854 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1855 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1856 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1857 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1858 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1859 1860 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1861 mumps->nloc_rhs = 0; 1862 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1863 for (j = 0; j < ompsize; j++) { 1864 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1865 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1866 } 1867 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1868 for (j = k = 0; j < ompsize; j++) { 1869 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */ 1870 } 1871 1872 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1873 PetscCallMPI(MPI_Group_free(&petsc_group)); 1874 PetscCallMPI(MPI_Group_free(&omp_group)); 1875 } 1876 1877 /* Realloc buffers when current nrhs is bigger than what we have met */ 1878 if (nrhs > mumps->max_nrhs) { 1879 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1880 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1881 mumps->max_nrhs = nrhs; 1882 } 1883 1884 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1885 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1886 mumps->rhs_disps[0] = 0; 1887 for (j = 1; j < ompsize; j++) { 1888 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1889 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1890 } 1891 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1892 } 1893 1894 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1895 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1896 1897 if (mumps->is_omp_master) { 1898 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1899 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1900 for (j = 0; j < ompsize; j++) { 1901 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1902 dst = dstbase; 1903 for (i = 0; i < nrhs; i++) { 1904 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1905 src += mumps->rhs_nrow[j]; 1906 dst += mumps->nloc_rhs; 1907 } 1908 dstbase += mumps->rhs_nrow[j]; 1909 } 1910 } 1911 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1912 } 1913 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1914 } 1915 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1916 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1917 mumps->id.lrhs_loc = mumps->nloc_rhs; 1918 mumps->id.irhs_loc = mumps->irhs_loc; 1919 PetscFunctionReturn(PETSC_SUCCESS); 1920 } 1921 1922 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1923 { 1924 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1925 const PetscScalar *barray = NULL; 1926 PetscScalar *array; 1927 IS is_iden, is_petsc; 1928 PetscInt i; 1929 PetscBool second_solve = PETSC_FALSE; 1930 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1931 1932 PetscFunctionBegin; 1933 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1934 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1935 &cite1)); 1936 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1937 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1938 &cite2)); 1939 1940 PetscCall(VecFlag(x, A->factorerrortype)); 1941 if (A->factorerrortype) { 1942 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 mumps->id.nrhs = 1; 1947 if (mumps->petsc_size > 1) { 1948 if (mumps->ICNTL20 == 10) { 1949 mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */ 1950 PetscCall(VecGetArrayRead(b, &barray)); 1951 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray)); 1952 } else { 1953 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/ 1954 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1955 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1956 if (!mumps->myid) { 1957 PetscCall(VecGetArray(mumps->b_seq, &array)); 1958 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1959 } 1960 } 1961 } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */ 1962 mumps->id.ICNTL(20) = 0; 1963 PetscCall(VecCopy(b, x)); 1964 PetscCall(VecGetArray(x, &array)); 1965 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1966 } 1967 1968 /* 1969 handle condensation step of Schur complement (if any) 1970 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1971 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1972 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1973 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1974 */ 1975 if (mumps->id.size_schur > 0) { 1976 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1977 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1978 second_solve = PETSC_TRUE; 1979 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 1980 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1981 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1982 } 1983 1984 mumps->id.job = JOB_SOLVE; 1985 PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs 1986 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1987 1988 /* handle expansion step of Schur complement (if any) */ 1989 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1990 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 1991 PetscCall(MatMumpsSolveSchur_Private(A)); 1992 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i); 1993 } 1994 1995 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1996 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1997 /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1998 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1999 } 2000 if (!mumps->scat_sol) { /* create scatter scat_sol */ 2001 PetscInt *isol2_loc = NULL; 2002 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 2003 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 2004 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 2005 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 2006 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 2007 PetscCall(ISDestroy(&is_iden)); 2008 PetscCall(ISDestroy(&is_petsc)); 2009 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 2010 } 2011 2012 PetscScalar *xarray; 2013 PetscCall(VecGetArray(mumps->x_seq, &xarray)); 2014 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray)); 2015 PetscCall(VecRestoreArray(mumps->x_seq, &xarray)); 2016 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2017 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2018 2019 if (mumps->ICNTL20 == 10) { // distributed RHS 2020 PetscCall(VecRestoreArrayRead(b, &barray)); 2021 } else if (!mumps->myid) { // centralized RHS 2022 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 2023 } 2024 } else { 2025 // id.rhs has the solution in mumps precision 2026 PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array)); 2027 PetscCall(VecRestoreArray(x, &array)); 2028 } 2029 2030 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 2035 { 2036 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2037 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2038 2039 PetscFunctionBegin; 2040 mumps->id.ICNTL(9) = 0; 2041 PetscCall(MatSolve_MUMPS(A, b, x)); 2042 mumps->id.ICNTL(9) = value; 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 2047 { 2048 Mat Bt = NULL; 2049 PetscBool denseX, denseB, flg, flgT; 2050 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2051 PetscInt i, nrhs, M, nrhsM; 2052 PetscScalar *array; 2053 const PetscScalar *barray; 2054 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 2055 PetscMUMPSInt *isol_loc, *isol_loc_save; 2056 PetscScalar *sol_loc; 2057 void *sol_loc_save; 2058 PetscCount sol_loc_len_save; 2059 IS is_to, is_from; 2060 PetscInt k, proc, j, m, myrstart; 2061 const PetscInt *rstart; 2062 Vec v_mpi, msol_loc; 2063 VecScatter scat_sol; 2064 Vec b_seq; 2065 VecScatter scat_rhs; 2066 PetscScalar *aa; 2067 PetscInt spnr, *ia, *ja; 2068 Mat_MPIAIJ *b = NULL; 2069 2070 PetscFunctionBegin; 2071 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 2072 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 2073 2074 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 2075 2076 if (denseB) { 2077 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 2078 mumps->id.ICNTL(20) = 0; /* dense RHS */ 2079 } else { /* sparse B */ 2080 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 2081 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 2082 PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 2083 PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 2084 /* input B is transpose of actual RHS matrix, 2085 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 2086 PetscCall(MatTransposeGetMat(B, &Bt)); 2087 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 2088 } 2089 2090 PetscCall(MatGetSize(B, &M, &nrhs)); 2091 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 2092 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 2093 mumps->id.lrhs = (PetscMUMPSInt)M; 2094 2095 if (mumps->petsc_size == 1) { // handle this easy case specially and return early 2096 PetscScalar *aa; 2097 PetscInt spnr, *ia, *ja; 2098 PetscBool second_solve = PETSC_FALSE; 2099 2100 PetscCall(MatDenseGetArray(X, &array)); 2101 if (denseB) { 2102 /* copy B to X */ 2103 PetscCall(MatDenseGetArrayRead(B, &barray)); 2104 PetscCall(PetscArraycpy(array, barray, nrhsM)); 2105 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2106 } else { /* sparse B */ 2107 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 2108 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2110 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2111 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2112 } 2113 PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2114 2115 /* handle condensation step of Schur complement (if any) */ 2116 if (mumps->id.size_schur > 0) { 2117 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 2118 second_solve = PETSC_TRUE; 2119 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 2120 mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */ 2121 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 2122 } 2123 2124 mumps->id.job = JOB_SOLVE; 2125 PetscMUMPS_c(mumps); 2126 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2127 2128 /* handle expansion step of Schur complement (if any) */ 2129 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 2130 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 2131 PetscCall(MatMumpsSolveSchur_Private(A)); 2132 for (j = 0; j < nrhs; ++j) 2133 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs); 2134 } 2135 2136 if (!denseB) { /* sparse B, restore ia, ja */ 2137 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 2138 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2139 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2140 } 2141 2142 // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X. 2143 PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array)); 2144 PetscCall(MatDenseRestoreArray(X, &array)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 2149 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 2150 2151 /* create msol_loc to hold mumps local solution */ 2152 isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */ 2153 sol_loc_save = mumps->id.sol_loc; 2154 sol_loc_len_save = mumps->id.sol_loc_len; 2155 mumps->id.isol_loc = NULL; // an init state 2156 mumps->id.sol_loc = NULL; 2157 mumps->id.sol_loc_len = 0; 2158 2159 lsol_loc = mumps->id.lsol_loc; 2160 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 2161 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 2162 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2163 mumps->id.isol_loc = isol_loc; 2164 2165 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 2166 2167 if (denseB) { 2168 if (mumps->ICNTL20 == 10) { 2169 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 2170 PetscCall(MatDenseGetArrayRead(B, &barray)); 2171 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc 2172 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2173 PetscCall(MatGetLocalSize(B, &m, NULL)); 2174 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X 2175 } else { 2176 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 2177 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 2178 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 2179 0, re-arrange B into desired order, which is a local operation. 2180 */ 2181 2182 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 2183 /* wrap dense rhs matrix B into a vector v_mpi */ 2184 PetscCall(MatGetLocalSize(B, &m, NULL)); 2185 PetscCall(MatDenseGetArrayRead(B, &barray)); 2186 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2187 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2188 2189 /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */ 2190 if (!mumps->myid) { 2191 PetscInt *idx; 2192 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 2193 PetscCall(PetscMalloc1(nrhsM, &idx)); 2194 PetscCall(MatGetOwnershipRanges(B, &rstart)); 2195 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 2196 for (j = 0; j < nrhs; j++) { 2197 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 2198 } 2199 } 2200 2201 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 2202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 2203 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 2204 } else { 2205 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 2206 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 2207 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 2208 } 2209 2210 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 2211 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2212 PetscCall(ISDestroy(&is_to)); 2213 PetscCall(ISDestroy(&is_from)); 2214 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2215 2216 if (!mumps->myid) { /* define rhs on the host */ 2217 PetscCall(VecGetArrayRead(b_seq, &barray)); 2218 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2219 PetscCall(VecRestoreArrayRead(b_seq, &barray)); 2220 } 2221 } 2222 } else { /* sparse B */ 2223 b = (Mat_MPIAIJ *)Bt->data; 2224 2225 /* wrap dense X into a vector v_mpi */ 2226 PetscCall(MatGetLocalSize(X, &m, NULL)); 2227 PetscCall(MatDenseGetArrayRead(X, &barray)); 2228 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2229 PetscCall(MatDenseRestoreArrayRead(X, &barray)); 2230 2231 if (!mumps->myid) { 2232 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 2233 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2234 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2235 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2236 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2237 } else { 2238 mumps->id.irhs_ptr = NULL; 2239 mumps->id.irhs_sparse = NULL; 2240 mumps->id.nz_rhs = 0; 2241 if (mumps->id.rhs_sparse_len) { 2242 PetscCall(PetscFree(mumps->id.rhs_sparse)); 2243 mumps->id.rhs_sparse_len = 0; 2244 } 2245 } 2246 } 2247 2248 /* solve phase */ 2249 mumps->id.job = JOB_SOLVE; 2250 PetscMUMPS_c(mumps); 2251 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2252 2253 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 2254 PetscCall(MatDenseGetArray(X, &array)); 2255 PetscCall(VecPlaceArray(v_mpi, array)); 2256 2257 /* create scatter scat_sol */ 2258 PetscCall(MatGetOwnershipRanges(X, &rstart)); 2259 /* iidx: index for scatter mumps solution to PETSc X */ 2260 2261 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 2262 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 2263 for (i = 0; i < lsol_loc; i++) { 2264 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 2265 2266 for (proc = 0; proc < mumps->petsc_size; proc++) { 2267 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 2268 myrstart = rstart[proc]; 2269 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 2270 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 2271 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 2272 break; 2273 } 2274 } 2275 2276 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 2277 } 2278 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 2279 PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[] 2280 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 2281 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2282 PetscCall(ISDestroy(&is_from)); 2283 PetscCall(ISDestroy(&is_to)); 2284 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2285 PetscCall(MatDenseRestoreArray(X, &array)); 2286 2287 if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers 2288 mumps->id.sol_loc_len = 0; 2289 PetscCall(PetscFree(mumps->id.sol_loc)); 2290 } 2291 2292 // restore old values 2293 mumps->id.sol_loc = sol_loc_save; 2294 mumps->id.sol_loc_len = sol_loc_len_save; 2295 mumps->id.isol_loc = isol_loc_save; 2296 2297 PetscCall(PetscFree2(sol_loc, isol_loc)); 2298 PetscCall(PetscFree(idxx)); 2299 PetscCall(VecDestroy(&msol_loc)); 2300 PetscCall(VecDestroy(&v_mpi)); 2301 if (!denseB) { 2302 if (!mumps->myid) { 2303 b = (Mat_MPIAIJ *)Bt->data; 2304 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 2305 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2306 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2307 } 2308 } else { 2309 if (mumps->ICNTL20 == 0) { 2310 PetscCall(VecDestroy(&b_seq)); 2311 PetscCall(VecScatterDestroy(&scat_rhs)); 2312 } 2313 } 2314 PetscCall(VecScatterDestroy(&scat_sol)); 2315 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2316 PetscFunctionReturn(PETSC_SUCCESS); 2317 } 2318 2319 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 2320 { 2321 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2322 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2323 2324 PetscFunctionBegin; 2325 mumps->id.ICNTL(9) = 0; 2326 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2327 mumps->id.ICNTL(9) = value; 2328 PetscFunctionReturn(PETSC_SUCCESS); 2329 } 2330 2331 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 2332 { 2333 PetscBool flg; 2334 Mat B; 2335 2336 PetscFunctionBegin; 2337 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2338 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 2339 2340 /* Create B=Bt^T that uses Bt's data structure */ 2341 PetscCall(MatCreateTranspose(Bt, &B)); 2342 2343 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2344 PetscCall(MatDestroy(&B)); 2345 PetscFunctionReturn(PETSC_SUCCESS); 2346 } 2347 2348 #if !defined(PETSC_USE_COMPLEX) 2349 /* 2350 input: 2351 F: numeric factor 2352 output: 2353 nneg: total number of negative pivots 2354 nzero: total number of zero pivots 2355 npos: (global dimension of F) - nneg - nzero 2356 */ 2357 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 2358 { 2359 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2360 PetscMPIInt size; 2361 2362 PetscFunctionBegin; 2363 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 2364 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 2365 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 2366 2367 if (nneg) *nneg = mumps->id.INFOG(12); 2368 if (nzero || npos) { 2369 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2370 if (nzero) *nzero = mumps->id.INFOG(28); 2371 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 2372 } 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 #endif 2376 2377 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 2378 { 2379 PetscMPIInt nreqs; 2380 PetscMUMPSInt *irn, *jcn; 2381 PetscMPIInt count; 2382 PetscCount totnnz, remain; 2383 const PetscInt osize = mumps->omp_comm_size; 2384 PetscScalar *val; 2385 2386 PetscFunctionBegin; 2387 if (osize > 1) { 2388 if (reuse == MAT_INITIAL_MATRIX) { 2389 /* master first gathers counts of nonzeros to receive */ 2390 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 2391 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 2392 2393 /* Then each computes number of send/recvs */ 2394 if (mumps->is_omp_master) { 2395 /* Start from 1 since self communication is not done in MPI */ 2396 nreqs = 0; 2397 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 2398 } else { 2399 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 2400 } 2401 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 2402 2403 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 2404 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 2405 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 2406 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 2407 */ 2408 nreqs = 0; /* counter for actual send/recvs */ 2409 if (mumps->is_omp_master) { 2410 totnnz = 0; 2411 2412 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 2413 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 2414 PetscCall(PetscMalloc1(totnnz, &val)); 2415 2416 /* Self communication */ 2417 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 2418 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 2419 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 2420 2421 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 2422 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 2423 PetscCall(PetscFree(mumps->val_alloc)); 2424 mumps->nnz = totnnz; 2425 mumps->irn = irn; 2426 mumps->jcn = jcn; 2427 mumps->val = mumps->val_alloc = val; 2428 2429 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2430 jcn += mumps->recvcount[0]; 2431 val += mumps->recvcount[0]; 2432 2433 /* Remote communication */ 2434 for (PetscMPIInt i = 1; i < osize; i++) { 2435 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2436 remain = mumps->recvcount[i] - count; 2437 while (count > 0) { 2438 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2439 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2440 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2441 irn += count; 2442 jcn += count; 2443 val += count; 2444 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2445 remain -= count; 2446 } 2447 } 2448 } else { 2449 irn = mumps->irn; 2450 jcn = mumps->jcn; 2451 val = mumps->val; 2452 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2453 remain = mumps->nnz - count; 2454 while (count > 0) { 2455 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2456 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2457 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2458 irn += count; 2459 jcn += count; 2460 val += count; 2461 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2462 remain -= count; 2463 } 2464 } 2465 } else { 2466 nreqs = 0; 2467 if (mumps->is_omp_master) { 2468 val = mumps->val + mumps->recvcount[0]; 2469 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 2470 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2471 remain = mumps->recvcount[i] - count; 2472 while (count > 0) { 2473 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2474 val += count; 2475 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2476 remain -= count; 2477 } 2478 } 2479 } else { 2480 val = mumps->val; 2481 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2482 remain = mumps->nnz - count; 2483 while (count > 0) { 2484 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2485 val += count; 2486 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2487 remain -= count; 2488 } 2489 } 2490 } 2491 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2492 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2493 } 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2498 { 2499 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2500 2501 PetscFunctionBegin; 2502 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2503 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2504 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2505 PetscFunctionReturn(PETSC_SUCCESS); 2506 } 2507 2508 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2509 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2510 2511 /* numerical factorization phase */ 2512 mumps->id.job = JOB_FACTNUMERIC; 2513 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2514 if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2515 } else { 2516 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2517 } 2518 2519 if (F->schur) { 2520 const PetscScalar *array; 2521 MUMPS_INT size = mumps->id.size_schur; 2522 PetscCall(MatDenseGetArrayRead(F->schur, &array)); 2523 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur)); 2524 PetscCall(MatDenseRestoreArrayRead(F->schur, &array)); 2525 } 2526 2527 if (mumps->id.ICNTL(22)) PetscCall(PetscStrncpy(mumps->id.ooc_prefix, ((PetscObject)F)->prefix, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_prefix))); 2528 2529 PetscMUMPS_c(mumps); 2530 if (mumps->id.INFOG(1) < 0) { 2531 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2532 if (mumps->id.INFOG(1) == -10) { 2533 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2534 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2535 } else if (mumps->id.INFOG(1) == -13) { 2536 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2537 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2538 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2539 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2540 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2541 } else { 2542 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2543 F->factorerrortype = MAT_FACTOR_OTHER; 2544 } 2545 } 2546 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2547 2548 F->assembled = PETSC_TRUE; 2549 2550 if (F->schur) { /* reset Schur status to unfactored */ 2551 #if defined(PETSC_HAVE_CUDA) 2552 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2553 #endif 2554 PetscScalar *array; 2555 PetscCall(MatDenseGetArray(F->schur, &array)); 2556 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array)); 2557 PetscCall(MatDenseRestoreArray(F->schur, &array)); 2558 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2559 mumps->id.ICNTL(19) = 2; 2560 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2561 } 2562 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2563 } 2564 2565 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2566 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2567 2568 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2569 // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the 2570 // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for 2571 // which the solution (in SOL_loc) is available on the local processor. 2572 // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will 2573 // normally have the same contents for each of these calls. The only exception is the case of 2574 // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non 2575 // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase. 2576 if (mumps->petsc_size > 1) { 2577 PetscInt lsol_loc; 2578 PetscScalar *array; 2579 2580 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2581 if (mumps->x_seq) { 2582 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2583 PetscCall(PetscFree(mumps->id.isol_loc)); 2584 PetscCall(VecDestroy(&mumps->x_seq)); 2585 } 2586 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2587 PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc)); 2588 PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq)); 2589 PetscCall(VecGetArray(mumps->x_seq, &array)); 2590 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2591 PetscCall(VecRestoreArray(mumps->x_seq, &array)); 2592 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2593 } 2594 PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2))); 2595 PetscFunctionReturn(PETSC_SUCCESS); 2596 } 2597 2598 /* Sets MUMPS options from the options database */ 2599 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2600 { 2601 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2602 PetscReal cntl; 2603 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2604 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2605 PetscBool flg = PETSC_FALSE; 2606 PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1); 2607 void *arr; 2608 2609 PetscFunctionBegin; 2610 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2611 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2612 PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE; 2613 PetscInt nthreads = 0; 2614 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2615 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2616 PetscMUMPSInt nblk, *blkvar, *blkptr; 2617 2618 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2619 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2620 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2621 2622 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2623 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2624 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2625 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2626 if (mumps->use_petsc_omp_support) { 2627 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2628 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2629 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2630 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2631 #else 2632 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2633 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2634 #endif 2635 } else { 2636 mumps->omp_comm = PETSC_COMM_SELF; 2637 mumps->mumps_comm = mumps->petsc_comm; 2638 mumps->is_omp_master = PETSC_TRUE; 2639 } 2640 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2641 mumps->reqs = NULL; 2642 mumps->tag = 0; 2643 2644 if (mumps->mumps_comm != MPI_COMM_NULL) { 2645 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2646 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2647 MPI_Comm comm; 2648 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2649 mumps->mumps_comm = comm; 2650 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2651 } 2652 2653 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2654 mumps->id.job = JOB_INIT; 2655 mumps->id.par = 1; /* host participates factorizaton and solve */ 2656 mumps->id.sym = mumps->sym; 2657 2658 size = mumps->id.size_schur; 2659 arr = mumps->id.schur; 2660 listvar_schur = mumps->id.listvar_schur; 2661 nblk = mumps->id.nblk; 2662 blkvar = mumps->id.blkvar; 2663 blkptr = mumps->id.blkptr; 2664 if (PetscDefined(USE_DEBUG)) { 2665 for (PetscInt i = 0; i < size; i++) 2666 PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1, 2667 A->rmap->N); 2668 } 2669 2670 PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL)); 2671 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]); 2672 PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar"); 2673 PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision)); 2674 2675 PetscMUMPS_c(mumps); 2676 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2677 2678 /* set PETSc-MUMPS default options - override MUMPS default */ 2679 mumps->id.ICNTL(3) = 0; 2680 mumps->id.ICNTL(4) = 0; 2681 if (mumps->petsc_size == 1) { 2682 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2683 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2684 } else { 2685 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2686 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2687 } 2688 if (nblk && blkptr) { 2689 mumps->id.ICNTL(15) = 1; 2690 mumps->id.nblk = nblk; 2691 mumps->id.blkvar = blkvar; 2692 mumps->id.blkptr = blkptr; 2693 } else mumps->id.ICNTL(15) = 0; 2694 2695 /* restore cached ICNTL and CNTL values */ 2696 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2697 for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]); 2698 2699 PetscCall(PetscFree(mumps->ICNTL_pre)); 2700 PetscCall(PetscFree(mumps->CNTL_pre)); 2701 2702 if (schur) { 2703 mumps->id.size_schur = size; 2704 mumps->id.schur_lld = size; 2705 mumps->id.schur = arr; 2706 mumps->id.listvar_schur = listvar_schur; 2707 if (mumps->petsc_size > 1) { 2708 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2709 2710 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2711 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2712 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm)); 2713 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2714 } else { 2715 if (F->factortype == MAT_FACTOR_LU) { 2716 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2717 } else { 2718 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2719 } 2720 } 2721 mumps->id.ICNTL(26) = -1; 2722 } 2723 2724 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2725 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2726 */ 2727 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2728 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); 2729 2730 mumps->scat_rhs = NULL; 2731 mumps->scat_sol = NULL; 2732 } 2733 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2734 if (flg) mumps->id.ICNTL(1) = icntl; 2735 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2736 if (flg) mumps->id.ICNTL(2) = icntl; 2737 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2738 if (flg) mumps->id.ICNTL(3) = icntl; 2739 2740 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2741 if (flg) mumps->id.ICNTL(4) = icntl; 2742 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2743 2744 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2745 if (flg) mumps->id.ICNTL(6) = icntl; 2746 2747 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2748 if (flg) { 2749 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2750 mumps->id.ICNTL(7) = icntl; 2751 } 2752 2753 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2754 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2755 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2756 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2757 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2758 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2759 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2760 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2761 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2762 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2763 if (flg) { 2764 if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2765 else if (mumps->id.ICNTL(15) > 0) { 2766 const PetscInt *bsizes; 2767 PetscInt nblocks, p, *blkptr = NULL; 2768 PetscMPIInt *recvcounts, *displs, n; 2769 PetscMPIInt rank, size = 0; 2770 2771 PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 2772 flg = PETSC_TRUE; 2773 for (p = 0; p < nblocks; ++p) { 2774 if (bsizes[p] > 1) break; 2775 } 2776 if (p == nblocks) flg = PETSC_FALSE; 2777 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 2778 if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 2779 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2780 if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2781 PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 2782 PetscCall(PetscMPIIntCast(nblocks, &n)); 2783 PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 2784 for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 2785 PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 2786 PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2787 PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2788 if (rank == 0) { 2789 blkptr[0] = 1; 2790 for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 2791 PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 2792 } 2793 PetscCall(PetscFree2(recvcounts, displs)); 2794 PetscCall(PetscFree(blkptr)); 2795 } 2796 } 2797 } 2798 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2799 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any Schur data (if any) */ 2800 PetscCall(MatDestroy(&F->schur)); 2801 PetscCall(MatMumpsResetSchur_Private(mumps)); 2802 } 2803 2804 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2805 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2806 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2807 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2808 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2809 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2810 */ 2811 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2812 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2813 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2814 #endif 2815 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2816 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2817 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2818 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2819 #endif 2820 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2821 2822 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), &flg)); 2823 if (flg && mumps->id.ICNTL(22) != 1) mumps->id.ICNTL(22) = 0; // MUMPS treats values other than 1 as 0. Normalize it so we can safely use 'if (mumps->id.ICNTL(22))' 2824 if (mumps->id.ICNTL(22)) { 2825 // MUMPS will use the /tmp directory if -mat_mumps_ooc_tmpdir is not set by user. 2826 // We don't provide option -mat_mumps_ooc_prefix, as we use F's prefix as OOC_PREFIX, which is set later during MatFactorNumeric_MUMPS() to also handle cases where users enable OOC via MatMumpsSetICNTL(). 2827 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "Out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir), NULL)); 2828 } 2829 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2830 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2831 if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 2832 2833 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2834 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2835 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2836 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2837 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2838 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2839 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2840 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2841 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2842 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2843 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2844 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2845 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2846 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2847 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL)); 2848 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2849 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2850 2851 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg)); 2852 if (flg) ID_CNTL_SET(mumps->id, 1, cntl); 2853 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg)); 2854 if (flg) ID_CNTL_SET(mumps->id, 2, cntl); 2855 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg)); 2856 if (flg) ID_CNTL_SET(mumps->id, 3, cntl); 2857 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg)); 2858 if (flg) ID_CNTL_SET(mumps->id, 4, cntl); 2859 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg)); 2860 if (flg) ID_CNTL_SET(mumps->id, 5, cntl); 2861 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg)); 2862 if (flg) ID_CNTL_SET(mumps->id, 7, cntl); 2863 2864 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2865 if (ninfo) { 2866 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2867 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2868 mumps->ninfo = ninfo; 2869 for (i = 0; i < ninfo; i++) { 2870 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2871 mumps->info[i] = info[i]; 2872 } 2873 } 2874 PetscOptionsEnd(); 2875 PetscFunctionReturn(PETSC_SUCCESS); 2876 } 2877 2878 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2879 { 2880 PetscFunctionBegin; 2881 if (mumps->id.INFOG(1) < 0) { 2882 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2883 if (mumps->id.INFOG(1) == -6) { 2884 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2885 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2886 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2887 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2888 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2889 } else { 2890 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2891 F->factorerrortype = MAT_FACTOR_OTHER; 2892 } 2893 } 2894 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2895 PetscFunctionReturn(PETSC_SUCCESS); 2896 } 2897 2898 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2899 { 2900 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2901 Vec b; 2902 const PetscInt M = A->rmap->N; 2903 2904 PetscFunctionBegin; 2905 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2906 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2907 PetscFunctionReturn(PETSC_SUCCESS); 2908 } 2909 2910 /* Set MUMPS options from the options database */ 2911 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2912 2913 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2914 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2915 2916 /* analysis phase */ 2917 mumps->id.job = JOB_FACTSYMBOLIC; 2918 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2919 switch (mumps->id.ICNTL(18)) { 2920 case 0: /* centralized assembled matrix input */ 2921 if (!mumps->myid) { 2922 mumps->id.nnz = mumps->nnz; 2923 mumps->id.irn = mumps->irn; 2924 mumps->id.jcn = mumps->jcn; 2925 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2926 if (r && mumps->id.ICNTL(7) == 7) { 2927 mumps->id.ICNTL(7) = 1; 2928 if (!mumps->myid) { 2929 const PetscInt *idx; 2930 PetscInt i; 2931 2932 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2933 PetscCall(ISGetIndices(r, &idx)); 2934 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2935 PetscCall(ISRestoreIndices(r, &idx)); 2936 } 2937 } 2938 } 2939 break; 2940 case 3: /* distributed assembled matrix input (size>1) */ 2941 mumps->id.nnz_loc = mumps->nnz; 2942 mumps->id.irn_loc = mumps->irn; 2943 mumps->id.jcn_loc = mumps->jcn; 2944 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2945 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2946 PetscCall(MatCreateVecs(A, NULL, &b)); 2947 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2948 PetscCall(VecDestroy(&b)); 2949 } 2950 break; 2951 } 2952 PetscMUMPS_c(mumps); 2953 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2954 2955 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2956 F->ops->solve = MatSolve_MUMPS; 2957 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2958 F->ops->matsolve = MatMatSolve_MUMPS; 2959 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2960 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2961 2962 mumps->matstruc = SAME_NONZERO_PATTERN; 2963 PetscFunctionReturn(PETSC_SUCCESS); 2964 } 2965 2966 /* Note the PETSc r and c permutations are ignored */ 2967 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2968 { 2969 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2970 Vec b; 2971 const PetscInt M = A->rmap->N; 2972 2973 PetscFunctionBegin; 2974 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2975 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2976 PetscFunctionReturn(PETSC_SUCCESS); 2977 } 2978 2979 /* Set MUMPS options from the options database */ 2980 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2981 2982 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2983 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2984 2985 /* analysis phase */ 2986 mumps->id.job = JOB_FACTSYMBOLIC; 2987 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2988 switch (mumps->id.ICNTL(18)) { 2989 case 0: /* centralized assembled matrix input */ 2990 if (!mumps->myid) { 2991 mumps->id.nnz = mumps->nnz; 2992 mumps->id.irn = mumps->irn; 2993 mumps->id.jcn = mumps->jcn; 2994 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2995 } 2996 break; 2997 case 3: /* distributed assembled matrix input (size>1) */ 2998 mumps->id.nnz_loc = mumps->nnz; 2999 mumps->id.irn_loc = mumps->irn; 3000 mumps->id.jcn_loc = mumps->jcn; 3001 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 3002 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 3003 PetscCall(MatCreateVecs(A, NULL, &b)); 3004 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 3005 PetscCall(VecDestroy(&b)); 3006 } 3007 break; 3008 } 3009 PetscMUMPS_c(mumps); 3010 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3011 3012 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 3013 F->ops->solve = MatSolve_MUMPS; 3014 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 3015 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3016 3017 mumps->matstruc = SAME_NONZERO_PATTERN; 3018 PetscFunctionReturn(PETSC_SUCCESS); 3019 } 3020 3021 /* Note the PETSc r permutation and factor info are ignored */ 3022 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 3023 { 3024 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3025 Vec b; 3026 const PetscInt M = A->rmap->N; 3027 3028 PetscFunctionBegin; 3029 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 3030 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 3031 PetscFunctionReturn(PETSC_SUCCESS); 3032 } 3033 3034 /* Set MUMPS options from the options database */ 3035 PetscCall(MatSetFromOptions_MUMPS(F, A)); 3036 3037 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 3038 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 3039 3040 /* analysis phase */ 3041 mumps->id.job = JOB_FACTSYMBOLIC; 3042 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 3043 switch (mumps->id.ICNTL(18)) { 3044 case 0: /* centralized assembled matrix input */ 3045 if (!mumps->myid) { 3046 mumps->id.nnz = mumps->nnz; 3047 mumps->id.irn = mumps->irn; 3048 mumps->id.jcn = mumps->jcn; 3049 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 3050 } 3051 break; 3052 case 3: /* distributed assembled matrix input (size>1) */ 3053 mumps->id.nnz_loc = mumps->nnz; 3054 mumps->id.irn_loc = mumps->irn; 3055 mumps->id.jcn_loc = mumps->jcn; 3056 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 3057 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 3058 PetscCall(MatCreateVecs(A, NULL, &b)); 3059 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 3060 PetscCall(VecDestroy(&b)); 3061 } 3062 break; 3063 } 3064 PetscMUMPS_c(mumps); 3065 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3066 3067 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 3068 F->ops->solve = MatSolve_MUMPS; 3069 F->ops->solvetranspose = MatSolve_MUMPS; 3070 F->ops->matsolve = MatMatSolve_MUMPS; 3071 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 3072 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3073 #if defined(PETSC_USE_COMPLEX) 3074 F->ops->getinertia = NULL; 3075 #else 3076 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 3077 #endif 3078 3079 mumps->matstruc = SAME_NONZERO_PATTERN; 3080 PetscFunctionReturn(PETSC_SUCCESS); 3081 } 3082 3083 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 3084 { 3085 PetscBool isascii; 3086 PetscViewerFormat format; 3087 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3088 3089 PetscFunctionBegin; 3090 /* check if matrix is mumps type */ 3091 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 3092 3093 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 3094 if (isascii) { 3095 PetscCall(PetscViewerGetFormat(viewer, &format)); 3096 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3097 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 3098 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3099 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 3100 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 3101 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 3102 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 3103 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 3104 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 3105 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 3106 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 3107 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 3108 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 3109 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 3110 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 3111 if (mumps->id.ICNTL(11) > 0) { 3112 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4))); 3113 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5))); 3114 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6))); 3115 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8))); 3116 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9))); 3117 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11))); 3118 } 3119 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 3120 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 3121 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 3122 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 3123 /* ICNTL(15-17) not used */ 3124 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 3125 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 3126 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 3127 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 3128 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 3129 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 3130 3131 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 3132 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 3133 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 3134 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 3135 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 3136 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 3137 3138 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 3139 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 3140 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 3141 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 3142 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 3143 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 3144 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 3145 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 3146 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49))); 3147 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 3148 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 3149 3150 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1))); 3151 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2))); 3152 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3))); 3153 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4))); 3154 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5))); 3155 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7))); 3156 3157 /* information local to each processor */ 3158 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 3159 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 3160 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1))); 3161 PetscCall(PetscViewerFlush(viewer)); 3162 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 3163 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2))); 3164 PetscCall(PetscViewerFlush(viewer)); 3165 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 3166 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3))); 3167 PetscCall(PetscViewerFlush(viewer)); 3168 3169 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 3170 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 3171 PetscCall(PetscViewerFlush(viewer)); 3172 3173 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 3174 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 3175 PetscCall(PetscViewerFlush(viewer)); 3176 3177 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 3178 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 3179 PetscCall(PetscViewerFlush(viewer)); 3180 3181 if (mumps->ninfo && mumps->ninfo <= 80) { 3182 PetscInt i; 3183 for (i = 0; i < mumps->ninfo; i++) { 3184 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 3185 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 3186 PetscCall(PetscViewerFlush(viewer)); 3187 } 3188 } 3189 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 3190 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 3191 3192 if (mumps->myid == 0) { /* information from the host */ 3193 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1))); 3194 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2))); 3195 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3))); 3196 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34))); 3197 3198 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 3199 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 3200 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 3201 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 3202 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 3203 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 3204 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 3205 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 3206 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 3207 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 3208 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 3209 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 3210 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 3211 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 3212 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 3213 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 3214 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 3215 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 3216 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 3217 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 3218 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 3219 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 3220 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 3221 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 3222 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 3223 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 3224 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 3225 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 3226 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 3227 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 3228 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 3229 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 3230 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 3231 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 3232 } 3233 } 3234 } 3235 PetscFunctionReturn(PETSC_SUCCESS); 3236 } 3237 3238 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 3239 { 3240 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3241 3242 PetscFunctionBegin; 3243 info->block_size = 1.0; 3244 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3245 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3246 info->nz_unneeded = 0.0; 3247 info->assemblies = 0.0; 3248 info->mallocs = 0.0; 3249 info->memory = 0.0; 3250 info->fill_ratio_given = 0; 3251 info->fill_ratio_needed = 0; 3252 info->factor_mallocs = 0; 3253 PetscFunctionReturn(PETSC_SUCCESS); 3254 } 3255 3256 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 3257 { 3258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3259 const PetscInt *idxs; 3260 PetscInt size, i; 3261 3262 PetscFunctionBegin; 3263 PetscCall(ISGetLocalSize(is, &size)); 3264 /* Schur complement matrix */ 3265 PetscCall(MatDestroy(&F->schur)); 3266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 3267 // don't allocate mumps->id.schur[] now as its precision is yet to know 3268 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 3269 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 3270 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 3271 3272 /* MUMPS expects Fortran style indices */ 3273 PetscCall(PetscFree(mumps->id.listvar_schur)); 3274 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 3275 PetscCall(ISGetIndices(is, &idxs)); 3276 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 3277 PetscCall(ISRestoreIndices(is, &idxs)); 3278 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 3279 if (mumps->id.icntl) mumps->id.ICNTL(26) = -1; 3280 else mumps->ICNTL26 = -1; 3281 PetscFunctionReturn(PETSC_SUCCESS); 3282 } 3283 3284 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 3285 { 3286 Mat St; 3287 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3288 PetscScalar *array; 3289 PetscInt i, j, N = mumps->id.size_schur; 3290 3291 PetscFunctionBegin; 3292 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 3293 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 3294 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 3295 PetscCall(MatSetType(St, MATDENSE)); 3296 PetscCall(MatSetUp(St)); 3297 PetscCall(MatDenseGetArray(St, &array)); 3298 if (!mumps->sym) { /* MUMPS always return a full matrix */ 3299 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 3300 for (i = 0; i < N; i++) { 3301 for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3302 } 3303 } else { /* stored by columns */ 3304 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3305 } 3306 } else { /* either full or lower-triangular (not packed) */ 3307 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 3308 for (i = 0; i < N; i++) { 3309 for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3310 } 3311 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 3312 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3313 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 3314 for (i = 0; i < N; i++) { 3315 for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3316 } 3317 } 3318 } 3319 PetscCall(MatDenseRestoreArray(St, &array)); 3320 *S = St; 3321 PetscFunctionReturn(PETSC_SUCCESS); 3322 } 3323 3324 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 3325 { 3326 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3327 3328 PetscFunctionBegin; 3329 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 3330 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 3331 for (i = 0; i < nICNTL_pre; ++i) 3332 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 3333 if (i == nICNTL_pre) { /* not already cached */ 3334 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 3335 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 3336 mumps->ICNTL_pre[0]++; 3337 } 3338 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 3339 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 3340 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 3341 PetscFunctionReturn(PETSC_SUCCESS); 3342 } 3343 3344 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 3345 { 3346 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3347 3348 PetscFunctionBegin; 3349 if (mumps->id.job == JOB_NULL) { 3350 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 3351 *ival = 0; 3352 for (i = 0; i < nICNTL_pre; ++i) { 3353 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 3354 } 3355 } else *ival = mumps->id.ICNTL(icntl); 3356 PetscFunctionReturn(PETSC_SUCCESS); 3357 } 3358 3359 /*@ 3360 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3361 3362 Logically Collective 3363 3364 Input Parameters: 3365 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3366 . icntl - index of MUMPS parameter array `ICNTL()` 3367 - ival - value of MUMPS `ICNTL(icntl)` 3368 3369 Options Database Key: 3370 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 3371 3372 Level: beginner 3373 3374 Note: 3375 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3376 3377 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3378 @*/ 3379 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 3380 { 3381 PetscFunctionBegin; 3382 PetscValidType(F, 1); 3383 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3384 PetscValidLogicalCollectiveInt(F, icntl, 2); 3385 PetscValidLogicalCollectiveInt(F, ival, 3); 3386 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3387 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 3388 PetscFunctionReturn(PETSC_SUCCESS); 3389 } 3390 3391 /*@ 3392 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3393 3394 Logically Collective 3395 3396 Input Parameters: 3397 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3398 - icntl - index of MUMPS parameter array ICNTL() 3399 3400 Output Parameter: 3401 . ival - value of MUMPS ICNTL(icntl) 3402 3403 Level: beginner 3404 3405 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3406 @*/ 3407 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 3408 { 3409 PetscFunctionBegin; 3410 PetscValidType(F, 1); 3411 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3412 PetscValidLogicalCollectiveInt(F, icntl, 2); 3413 PetscAssertPointer(ival, 3); 3414 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3415 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3416 PetscFunctionReturn(PETSC_SUCCESS); 3417 } 3418 3419 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 3420 { 3421 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3422 3423 PetscFunctionBegin; 3424 if (mumps->id.job == JOB_NULL) { 3425 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3426 for (i = 0; i < nCNTL_pre; ++i) 3427 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 3428 if (i == nCNTL_pre) { 3429 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 3430 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 3431 mumps->CNTL_pre[0]++; 3432 } 3433 mumps->CNTL_pre[1 + 2 * i] = icntl; 3434 mumps->CNTL_pre[2 + 2 * i] = val; 3435 } else ID_CNTL_SET(mumps->id, icntl, val); 3436 PetscFunctionReturn(PETSC_SUCCESS); 3437 } 3438 3439 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 3440 { 3441 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3442 3443 PetscFunctionBegin; 3444 if (mumps->id.job == JOB_NULL) { 3445 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3446 *val = 0.0; 3447 for (i = 0; i < nCNTL_pre; ++i) { 3448 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 3449 } 3450 } else *val = ID_CNTL_GET(mumps->id, icntl); 3451 PetscFunctionReturn(PETSC_SUCCESS); 3452 } 3453 3454 /*@C 3455 MatMumpsSetOocTmpDir - Set MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc> 3456 3457 Logically Collective 3458 3459 Input Parameters: 3460 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`. 3461 - tmpdir - temporary directory for out-of-core facility. 3462 3463 Level: beginner 3464 3465 Note: 3466 To make it effective, this routine must be called before the numeric factorization, i.e., `PCSetUp()`. 3467 If `ooc_tmpdir` is not set, MUMPS will also check the environment variable `MUMPS_OOC_TMPDIR`. But if neither was defined, it will use /tmp by default. 3468 3469 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3470 @*/ 3471 PetscErrorCode MatMumpsSetOocTmpDir(Mat F, const char *tmpdir) 3472 { 3473 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3474 3475 PetscFunctionBegin; 3476 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 3477 PetscValidType(F, 1); 3478 PetscCall(PetscStrncpy(mumps->id.ooc_tmpdir, tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir))); 3479 PetscFunctionReturn(PETSC_SUCCESS); 3480 } 3481 3482 /*@C 3483 MatMumpsGetOocTmpDir - Get MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc> 3484 3485 Logically Collective 3486 3487 Input Parameter: 3488 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`. 3489 3490 Output Parameter: 3491 . tmpdir - temporary directory for out-of-core facility. 3492 3493 Level: beginner 3494 3495 Note: 3496 The returned string is read-only and user should not try to change it. 3497 3498 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3499 @*/ 3500 PetscErrorCode MatMumpsGetOocTmpDir(Mat F, const char **tmpdir) 3501 { 3502 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3503 3504 PetscFunctionBegin; 3505 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 3506 PetscValidType(F, 1); 3507 if (tmpdir) *tmpdir = mumps->id.ooc_tmpdir; 3508 PetscFunctionReturn(PETSC_SUCCESS); 3509 } 3510 3511 /*@ 3512 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3513 3514 Logically Collective 3515 3516 Input Parameters: 3517 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3518 . icntl - index of MUMPS parameter array `CNTL()` 3519 - val - value of MUMPS `CNTL(icntl)` 3520 3521 Options Database Key: 3522 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 3523 3524 Level: beginner 3525 3526 Note: 3527 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3528 3529 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3530 @*/ 3531 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3532 { 3533 PetscFunctionBegin; 3534 PetscValidType(F, 1); 3535 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3536 PetscValidLogicalCollectiveInt(F, icntl, 2); 3537 PetscValidLogicalCollectiveReal(F, val, 3); 3538 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3539 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 3540 PetscFunctionReturn(PETSC_SUCCESS); 3541 } 3542 3543 /*@ 3544 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3545 3546 Logically Collective 3547 3548 Input Parameters: 3549 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3550 - icntl - index of MUMPS parameter array CNTL() 3551 3552 Output Parameter: 3553 . val - value of MUMPS CNTL(icntl) 3554 3555 Level: beginner 3556 3557 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3558 @*/ 3559 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3560 { 3561 PetscFunctionBegin; 3562 PetscValidType(F, 1); 3563 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3564 PetscValidLogicalCollectiveInt(F, icntl, 2); 3565 PetscAssertPointer(val, 3); 3566 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3567 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3568 PetscFunctionReturn(PETSC_SUCCESS); 3569 } 3570 3571 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3572 { 3573 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3574 3575 PetscFunctionBegin; 3576 *info = mumps->id.INFO(icntl); 3577 PetscFunctionReturn(PETSC_SUCCESS); 3578 } 3579 3580 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3581 { 3582 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3583 3584 PetscFunctionBegin; 3585 *infog = mumps->id.INFOG(icntl); 3586 PetscFunctionReturn(PETSC_SUCCESS); 3587 } 3588 3589 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3590 { 3591 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3592 3593 PetscFunctionBegin; 3594 *rinfo = ID_RINFO_GET(mumps->id, icntl); 3595 PetscFunctionReturn(PETSC_SUCCESS); 3596 } 3597 3598 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3599 { 3600 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3601 3602 PetscFunctionBegin; 3603 *rinfog = ID_RINFOG_GET(mumps->id, icntl); 3604 PetscFunctionReturn(PETSC_SUCCESS); 3605 } 3606 3607 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 3608 { 3609 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3610 3611 PetscFunctionBegin; 3612 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 3613 *size = 0; 3614 *array = NULL; 3615 if (!mumps->myid) { 3616 *size = mumps->id.INFOG(28); 3617 PetscCall(PetscMalloc1(*size, array)); 3618 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 3619 } 3620 PetscFunctionReturn(PETSC_SUCCESS); 3621 } 3622 3623 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3624 { 3625 Mat Bt = NULL, Btseq = NULL; 3626 PetscBool flg; 3627 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3628 PetscScalar *aa; 3629 PetscInt spnr, *ia, *ja, M, nrhs; 3630 3631 PetscFunctionBegin; 3632 PetscAssertPointer(spRHS, 2); 3633 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3634 PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3635 PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 3636 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3637 3638 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3639 3640 if (mumps->petsc_size > 1) { 3641 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3642 Btseq = b->A; 3643 } else { 3644 Btseq = Bt; 3645 } 3646 3647 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3648 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3649 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3650 mumps->id.rhs = NULL; 3651 3652 if (!mumps->myid) { 3653 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3654 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3655 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3656 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3657 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 3658 } else { 3659 mumps->id.irhs_ptr = NULL; 3660 mumps->id.irhs_sparse = NULL; 3661 mumps->id.nz_rhs = 0; 3662 if (mumps->id.rhs_sparse_len) { 3663 PetscCall(PetscFree(mumps->id.rhs_sparse)); 3664 mumps->id.rhs_sparse_len = 0; 3665 } 3666 } 3667 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3668 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3669 3670 /* solve phase */ 3671 mumps->id.job = JOB_SOLVE; 3672 PetscMUMPS_c(mumps); 3673 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3674 3675 if (!mumps->myid) { 3676 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3677 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3678 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3679 } 3680 PetscFunctionReturn(PETSC_SUCCESS); 3681 } 3682 3683 /*@ 3684 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3685 3686 Logically Collective 3687 3688 Input Parameter: 3689 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3690 3691 Output Parameter: 3692 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3693 3694 Level: beginner 3695 3696 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3697 @*/ 3698 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3699 { 3700 PetscFunctionBegin; 3701 PetscValidType(F, 1); 3702 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3703 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3704 PetscFunctionReturn(PETSC_SUCCESS); 3705 } 3706 3707 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3708 { 3709 Mat spRHS; 3710 3711 PetscFunctionBegin; 3712 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3713 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3714 PetscCall(MatDestroy(&spRHS)); 3715 PetscFunctionReturn(PETSC_SUCCESS); 3716 } 3717 3718 /*@ 3719 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3720 3721 Logically Collective 3722 3723 Input Parameter: 3724 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3725 3726 Output Parameter: 3727 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3728 3729 Level: beginner 3730 3731 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3732 @*/ 3733 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3734 { 3735 PetscBool flg; 3736 3737 PetscFunctionBegin; 3738 PetscValidType(F, 1); 3739 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3740 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3741 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3742 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3743 PetscFunctionReturn(PETSC_SUCCESS); 3744 } 3745 3746 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3747 { 3748 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3749 3750 PetscFunctionBegin; 3751 if (nblk) { 3752 PetscAssertPointer(blkptr, 4); 3753 PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 3754 PetscCall(PetscFree(mumps->id.blkptr)); 3755 PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 3756 for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3757 // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15). 3758 // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS(). 3759 // mumps->id.ICNTL(15) = 1; 3760 if (blkvar) { 3761 PetscCall(PetscFree(mumps->id.blkvar)); 3762 PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 3763 for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 3764 } 3765 } else { 3766 PetscCall(PetscFree(mumps->id.blkptr)); 3767 PetscCall(PetscFree(mumps->id.blkvar)); 3768 // mumps->id.ICNTL(15) = 0; 3769 mumps->id.nblk = 0; 3770 } 3771 PetscFunctionReturn(PETSC_SUCCESS); 3772 } 3773 3774 /*@ 3775 MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 3776 3777 Not collective, only relevant on the first process of the MPI communicator 3778 3779 Input Parameters: 3780 + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3781 . nblk - the number of blocks 3782 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 3783 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 3784 3785 Level: advanced 3786 3787 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 3788 @*/ 3789 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3790 { 3791 PetscFunctionBegin; 3792 PetscValidType(F, 1); 3793 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3794 PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 3795 PetscFunctionReturn(PETSC_SUCCESS); 3796 } 3797 3798 /*@ 3799 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3800 3801 Logically Collective 3802 3803 Input Parameters: 3804 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3805 - icntl - index of MUMPS parameter array INFO() 3806 3807 Output Parameter: 3808 . ival - value of MUMPS INFO(icntl) 3809 3810 Level: beginner 3811 3812 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3813 @*/ 3814 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3815 { 3816 PetscFunctionBegin; 3817 PetscValidType(F, 1); 3818 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3819 PetscAssertPointer(ival, 3); 3820 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3821 PetscFunctionReturn(PETSC_SUCCESS); 3822 } 3823 3824 /*@ 3825 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3826 3827 Logically Collective 3828 3829 Input Parameters: 3830 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3831 - icntl - index of MUMPS parameter array INFOG() 3832 3833 Output Parameter: 3834 . ival - value of MUMPS INFOG(icntl) 3835 3836 Level: beginner 3837 3838 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3839 @*/ 3840 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3841 { 3842 PetscFunctionBegin; 3843 PetscValidType(F, 1); 3844 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3845 PetscAssertPointer(ival, 3); 3846 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3847 PetscFunctionReturn(PETSC_SUCCESS); 3848 } 3849 3850 /*@ 3851 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3852 3853 Logically Collective 3854 3855 Input Parameters: 3856 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3857 - icntl - index of MUMPS parameter array RINFO() 3858 3859 Output Parameter: 3860 . val - value of MUMPS RINFO(icntl) 3861 3862 Level: beginner 3863 3864 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3865 @*/ 3866 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3867 { 3868 PetscFunctionBegin; 3869 PetscValidType(F, 1); 3870 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3871 PetscAssertPointer(val, 3); 3872 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3873 PetscFunctionReturn(PETSC_SUCCESS); 3874 } 3875 3876 /*@ 3877 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3878 3879 Logically Collective 3880 3881 Input Parameters: 3882 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3883 - icntl - index of MUMPS parameter array RINFOG() 3884 3885 Output Parameter: 3886 . val - value of MUMPS RINFOG(icntl) 3887 3888 Level: beginner 3889 3890 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3891 @*/ 3892 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3893 { 3894 PetscFunctionBegin; 3895 PetscValidType(F, 1); 3896 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3897 PetscAssertPointer(val, 3); 3898 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3899 PetscFunctionReturn(PETSC_SUCCESS); 3900 } 3901 3902 /*@ 3903 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3904 3905 Logically Collective 3906 3907 Input Parameter: 3908 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3909 3910 Output Parameters: 3911 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3912 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3913 for freeing this array. 3914 3915 Level: beginner 3916 3917 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3918 @*/ 3919 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3920 { 3921 PetscFunctionBegin; 3922 PetscValidType(F, 1); 3923 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3924 PetscAssertPointer(size, 2); 3925 PetscAssertPointer(array, 3); 3926 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3927 PetscFunctionReturn(PETSC_SUCCESS); 3928 } 3929 3930 /*MC 3931 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3932 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3933 3934 Works with `MATAIJ` and `MATSBAIJ` matrices 3935 3936 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3937 3938 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3939 See details below. 3940 3941 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3942 3943 Options Database Keys: 3944 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3945 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3946 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3947 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3948 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3949 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3950 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3951 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3952 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3953 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3954 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3955 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3956 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3957 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3958 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3959 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3960 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3961 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3962 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3963 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3964 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3965 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3966 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3967 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3968 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3969 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3970 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3971 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3972 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3973 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3974 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3975 . -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase 3976 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3977 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3978 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3979 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3980 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3981 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3982 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3983 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3984 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3985 3986 Level: beginner 3987 3988 Notes: 3989 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3990 error if the matrix is Hermitian. 3991 3992 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3993 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3994 3995 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3996 the failure with 3997 .vb 3998 KSPGetPC(ksp,&pc); 3999 PCFactorGetMatrix(pc,&mat); 4000 MatMumpsGetInfo(mat,....); 4001 MatMumpsGetInfog(mat,....); etc. 4002 .ve 4003 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 4004 4005 MUMPS provides 64-bit integer support in two build modes: 4006 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 4007 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 4008 4009 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 4010 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 4011 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 4012 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 4013 4014 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 4015 4016 Two modes to run MUMPS/PETSc with OpenMP 4017 .vb 4018 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 4019 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpiexec -n 4 ./test". 4020 .ve 4021 4022 .vb 4023 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 4024 if a compute node has 32 cores and you run on two nodes, you may use "mpiexec -n 64 ./test -mat_mumps_use_omp_threads 16" 4025 .ve 4026 4027 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 4028 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 4029 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 4030 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 4031 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 4032 4033 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 4034 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 4035 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 4036 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 4037 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 4038 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 4039 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 4040 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 4041 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 4042 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 4043 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 4044 examine the mapping result. 4045 4046 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 4047 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 4048 calls `omp_set_num_threads`(m) internally before calling MUMPS. 4049 4050 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 4051 4052 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 4053 M*/ 4054 4055 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 4056 { 4057 PetscFunctionBegin; 4058 *type = MATSOLVERMUMPS; 4059 PetscFunctionReturn(PETSC_SUCCESS); 4060 } 4061 4062 /* MatGetFactor for Seq and MPI AIJ matrices */ 4063 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 4064 { 4065 Mat B; 4066 Mat_MUMPS *mumps; 4067 PetscBool isSeqAIJ, isDiag, isDense; 4068 PetscMPIInt size; 4069 4070 PetscFunctionBegin; 4071 if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4072 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4073 *F = NULL; 4074 PetscFunctionReturn(PETSC_SUCCESS); 4075 } 4076 /* Create the factorization matrix */ 4077 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 4078 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 4079 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4080 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4081 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4082 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4083 PetscCall(MatSetUp(B)); 4084 4085 PetscCall(PetscNew(&mumps)); 4086 4087 B->ops->view = MatView_MUMPS; 4088 B->ops->getinfo = MatGetInfo_MUMPS; 4089 4090 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4091 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4092 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4093 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4094 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4095 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4096 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4097 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4098 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4099 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4100 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4101 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4102 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4103 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4104 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4105 4106 if (ftype == MAT_FACTOR_LU) { 4107 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4108 B->factortype = MAT_FACTOR_LU; 4109 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 4110 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4111 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4112 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 4113 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4114 mumps->sym = 0; 4115 } else { 4116 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4117 B->factortype = MAT_FACTOR_CHOLESKY; 4118 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 4119 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4120 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4121 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 4122 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4123 if (PetscDefined(USE_COMPLEX)) mumps->sym = 2; 4124 else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4125 else mumps->sym = 2; 4126 } 4127 4128 /* set solvertype */ 4129 PetscCall(PetscFree(B->solvertype)); 4130 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4131 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4132 if (size == 1) { 4133 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4134 B->canuseordering = PETSC_TRUE; 4135 } 4136 B->ops->destroy = MatDestroy_MUMPS; 4137 B->data = (void *)mumps; 4138 4139 *F = B; 4140 mumps->id.job = JOB_NULL; 4141 mumps->ICNTL_pre = NULL; 4142 mumps->CNTL_pre = NULL; 4143 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4144 PetscFunctionReturn(PETSC_SUCCESS); 4145 } 4146 4147 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 4148 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 4149 { 4150 Mat B; 4151 Mat_MUMPS *mumps; 4152 PetscBool isSeqSBAIJ; 4153 PetscMPIInt size; 4154 4155 PetscFunctionBegin; 4156 if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4157 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4158 *F = NULL; 4159 PetscFunctionReturn(PETSC_SUCCESS); 4160 } 4161 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4162 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4163 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4164 PetscCall(MatSetUp(B)); 4165 4166 PetscCall(PetscNew(&mumps)); 4167 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 4168 if (isSeqSBAIJ) { 4169 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 4170 } else { 4171 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 4172 } 4173 4174 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4175 B->ops->view = MatView_MUMPS; 4176 B->ops->getinfo = MatGetInfo_MUMPS; 4177 4178 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4179 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4180 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4181 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4182 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4183 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4184 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4185 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4186 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4187 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4188 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4190 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4193 4194 B->factortype = MAT_FACTOR_CHOLESKY; 4195 if (PetscDefined(USE_COMPLEX)) mumps->sym = 2; 4196 else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4197 else mumps->sym = 2; 4198 4199 /* set solvertype */ 4200 PetscCall(PetscFree(B->solvertype)); 4201 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4202 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4203 if (size == 1) { 4204 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4205 B->canuseordering = PETSC_TRUE; 4206 } 4207 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4208 B->ops->destroy = MatDestroy_MUMPS; 4209 B->data = (void *)mumps; 4210 4211 *F = B; 4212 mumps->id.job = JOB_NULL; 4213 mumps->ICNTL_pre = NULL; 4214 mumps->CNTL_pre = NULL; 4215 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4216 PetscFunctionReturn(PETSC_SUCCESS); 4217 } 4218 4219 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 4220 { 4221 Mat B; 4222 Mat_MUMPS *mumps; 4223 PetscBool isSeqBAIJ; 4224 PetscMPIInt size; 4225 4226 PetscFunctionBegin; 4227 /* Create the factorization matrix */ 4228 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 4229 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4230 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4231 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4232 PetscCall(MatSetUp(B)); 4233 4234 PetscCall(PetscNew(&mumps)); 4235 PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 4236 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 4237 B->factortype = MAT_FACTOR_LU; 4238 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 4239 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 4240 mumps->sym = 0; 4241 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4242 4243 B->ops->view = MatView_MUMPS; 4244 B->ops->getinfo = MatGetInfo_MUMPS; 4245 4246 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4247 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4248 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4249 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4250 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4251 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4252 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4253 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4254 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4255 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4256 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4257 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4258 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4259 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4260 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4261 4262 /* set solvertype */ 4263 PetscCall(PetscFree(B->solvertype)); 4264 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4265 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4266 if (size == 1) { 4267 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4268 B->canuseordering = PETSC_TRUE; 4269 } 4270 B->ops->destroy = MatDestroy_MUMPS; 4271 B->data = (void *)mumps; 4272 4273 *F = B; 4274 mumps->id.job = JOB_NULL; 4275 mumps->ICNTL_pre = NULL; 4276 mumps->CNTL_pre = NULL; 4277 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4278 PetscFunctionReturn(PETSC_SUCCESS); 4279 } 4280 4281 /* MatGetFactor for Seq and MPI SELL matrices */ 4282 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 4283 { 4284 Mat B; 4285 Mat_MUMPS *mumps; 4286 PetscBool isSeqSELL; 4287 PetscMPIInt size; 4288 4289 PetscFunctionBegin; 4290 /* Create the factorization matrix */ 4291 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 4292 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4293 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4294 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4295 PetscCall(MatSetUp(B)); 4296 4297 PetscCall(PetscNew(&mumps)); 4298 4299 B->ops->view = MatView_MUMPS; 4300 B->ops->getinfo = MatGetInfo_MUMPS; 4301 4302 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4303 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4304 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4305 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4306 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4307 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4308 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4309 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4310 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4311 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4312 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4313 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4314 4315 PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4316 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4317 B->factortype = MAT_FACTOR_LU; 4318 PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4319 mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 4320 mumps->sym = 0; 4321 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4322 4323 /* set solvertype */ 4324 PetscCall(PetscFree(B->solvertype)); 4325 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4326 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4327 if (size == 1) { 4328 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4329 B->canuseordering = PETSC_TRUE; 4330 } 4331 B->ops->destroy = MatDestroy_MUMPS; 4332 B->data = (void *)mumps; 4333 4334 *F = B; 4335 mumps->id.job = JOB_NULL; 4336 mumps->ICNTL_pre = NULL; 4337 mumps->CNTL_pre = NULL; 4338 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4339 PetscFunctionReturn(PETSC_SUCCESS); 4340 } 4341 4342 /* MatGetFactor for MATNEST matrices */ 4343 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 4344 { 4345 Mat B, **mats; 4346 Mat_MUMPS *mumps; 4347 PetscInt nr, nc; 4348 PetscMPIInt size; 4349 PetscBool flg = PETSC_TRUE; 4350 4351 PetscFunctionBegin; 4352 if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4353 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4354 *F = NULL; 4355 PetscFunctionReturn(PETSC_SUCCESS); 4356 } 4357 4358 /* Return if some condition is not satisfied */ 4359 *F = NULL; 4360 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 4361 if (ftype == MAT_FACTOR_CHOLESKY) { 4362 IS *rows, *cols; 4363 PetscInt *m, *M; 4364 4365 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 4366 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 4367 PetscCall(MatNestGetISs(A, rows, cols)); 4368 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 4369 if (!flg) { 4370 PetscCall(PetscFree2(rows, cols)); 4371 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 4372 PetscFunctionReturn(PETSC_SUCCESS); 4373 } 4374 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 4375 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 4376 for (PetscInt r = 0; flg && r < nr; r++) 4377 for (PetscInt k = r + 1; flg && k < nr; k++) 4378 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 4379 PetscCall(PetscFree2(m, M)); 4380 PetscCall(PetscFree2(rows, cols)); 4381 if (!flg) { 4382 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 4383 PetscFunctionReturn(PETSC_SUCCESS); 4384 } 4385 } 4386 4387 for (PetscInt r = 0; r < nr; r++) { 4388 for (PetscInt c = 0; c < nc; c++) { 4389 Mat sub = mats[r][c]; 4390 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 4391 4392 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 4393 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 4394 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 4395 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 4396 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 4397 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 4398 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 4399 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 4400 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 4401 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4402 if (ftype == MAT_FACTOR_CHOLESKY) { 4403 if (r == c) { 4404 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 4405 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4406 flg = PETSC_FALSE; 4407 } 4408 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4409 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4410 flg = PETSC_FALSE; 4411 } 4412 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4413 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 4414 flg = PETSC_FALSE; 4415 } 4416 } 4417 } 4418 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 4419 4420 /* Create the factorization matrix */ 4421 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4422 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4423 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4424 PetscCall(MatSetUp(B)); 4425 4426 PetscCall(PetscNew(&mumps)); 4427 4428 B->ops->view = MatView_MUMPS; 4429 B->ops->getinfo = MatGetInfo_MUMPS; 4430 4431 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4432 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4433 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4434 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4435 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4436 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4437 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4438 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4439 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4440 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4441 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4442 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4443 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4444 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4445 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4446 4447 if (ftype == MAT_FACTOR_LU) { 4448 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4449 B->factortype = MAT_FACTOR_LU; 4450 mumps->sym = 0; 4451 } else { 4452 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4453 B->factortype = MAT_FACTOR_CHOLESKY; 4454 if (PetscDefined(USE_COMPLEX)) mumps->sym = 2; 4455 else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4456 else mumps->sym = 2; 4457 } 4458 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 4459 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 4460 4461 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4462 if (size == 1) { 4463 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4464 B->canuseordering = PETSC_TRUE; 4465 } 4466 4467 /* set solvertype */ 4468 PetscCall(PetscFree(B->solvertype)); 4469 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4470 B->ops->destroy = MatDestroy_MUMPS; 4471 B->data = (void *)mumps; 4472 4473 *F = B; 4474 mumps->id.job = JOB_NULL; 4475 mumps->ICNTL_pre = NULL; 4476 mumps->CNTL_pre = NULL; 4477 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4478 PetscFunctionReturn(PETSC_SUCCESS); 4479 } 4480 4481 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 4482 { 4483 PetscFunctionBegin; 4484 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4485 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4486 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4487 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4488 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4489 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4490 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4491 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4492 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4493 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4494 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 4495 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4496 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4497 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4498 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4499 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4500 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4501 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 4502 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 4503 PetscFunctionReturn(PETSC_SUCCESS); 4504 } 4505