1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 #include <petsc/private/vecimpl.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 15 #include <cmumps_c.h> 16 #include <zmumps_c.h> 17 #include <smumps_c.h> 18 #include <dmumps_c.h> 19 #else 20 #if defined(PETSC_USE_COMPLEX) 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <cmumps_c.h> 23 #define MUMPS_c cmumps_c 24 #define MUMPS_STRUC_C CMUMPS_STRUC_C 25 #define MumpsScalar CMUMPS_COMPLEX 26 #else 27 #include <zmumps_c.h> 28 #define MUMPS_c zmumps_c 29 #define MUMPS_STRUC_C ZMUMPS_STRUC_C 30 #define MumpsScalar ZMUMPS_COMPLEX 31 #endif 32 #else 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #include <smumps_c.h> 35 #define MUMPS_c smumps_c 36 #define MUMPS_STRUC_C SMUMPS_STRUC_C 37 #define MumpsScalar SMUMPS_REAL 38 #else 39 #include <dmumps_c.h> 40 #define MUMPS_c dmumps_c 41 #define MUMPS_STRUC_C DMUMPS_STRUC_C 42 #define MumpsScalar DMUMPS_REAL 43 #endif 44 #endif 45 #endif 46 EXTERN_C_END 47 48 #define JOB_INIT -1 49 #define JOB_NULL 0 50 #define JOB_FACTSYMBOLIC 1 51 #define JOB_FACTNUMERIC 2 52 #define JOB_SOLVE 3 53 #define JOB_END -2 54 55 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 56 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 57 naming convention in PetscMPIInt, PetscBLASInt etc. 58 */ 59 typedef MUMPS_INT PetscMUMPSInt; 60 61 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 62 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 63 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 64 #endif 65 #else 66 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 67 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 68 #endif 69 #endif 70 71 #define MPIU_MUMPSINT MPI_INT 72 #define PETSC_MUMPS_INT_MAX 2147483647 73 #define PETSC_MUMPS_INT_MIN -2147483648 74 75 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 76 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 77 { 78 PetscFunctionBegin; 79 #if PetscDefined(USE_64BIT_INDICES) 80 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 81 #endif 82 *b = (PetscMUMPSInt)a; 83 PetscFunctionReturn(PETSC_SUCCESS); 84 } 85 86 /* Put these utility routines here since they are only used in this file */ 87 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 88 { 89 PetscInt myval; 90 PetscBool myset; 91 92 PetscFunctionBegin; 93 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 94 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 95 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 96 if (set) *set = myset; 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 100 101 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C. 102 // 103 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type. 104 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size 105 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall(). 106 // 107 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface. 108 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used. 109 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers. 110 typedef struct { 111 PetscPrecision precision; // precision used by MUMPS 112 void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex 113 114 // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions. 115 MUMPS_INT sym, par, job; 116 MUMPS_INT comm_fortran; /* Fortran communicator */ 117 MUMPS_INT *icntl; 118 void *cntl; // MumpsReal, fixed size array 119 MUMPS_INT n; 120 MUMPS_INT nblk; 121 122 /* Assembled entry */ 123 MUMPS_INT8 nnz; 124 MUMPS_INT *irn; 125 MUMPS_INT *jcn; 126 void *a; // MumpsScalar, centralized input 127 PetscCount a_len; 128 129 /* Distributed entry */ 130 MUMPS_INT8 nnz_loc; 131 MUMPS_INT *irn_loc; 132 MUMPS_INT *jcn_loc; 133 void *a_loc; // MumpsScalar, distributed input 134 PetscCount a_loc_len; 135 136 /* Matrix by blocks */ 137 MUMPS_INT *blkptr; 138 MUMPS_INT *blkvar; 139 140 /* Ordering, if given by user */ 141 MUMPS_INT *perm_in; 142 143 /* RHS, solution, ouptput data and statistics */ 144 void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers 145 PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar 146 147 MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc; 148 MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc; 149 // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0) 150 MUMPS_INT schur_lld; 151 MUMPS_INT *info, *infog; // fixed size array 152 void *rinfo, *rinfog; // MumpsReal, fixed size array 153 154 /* Null space */ 155 MUMPS_INT *pivnul_list; // allocated by MUMPS! 156 MUMPS_INT *mapping; // allocated by MUMPS! 157 158 /* Schur */ 159 MUMPS_INT size_schur; 160 MUMPS_INT *listvar_schur; 161 void *schur; // MumpsScalar 162 PetscCount schur_len; 163 164 /* For out-of-core */ 165 char *ooc_tmpdir; // fixed size array 166 char *ooc_prefix; // fixed size array 167 } XMUMPS_STRUC_C; 168 169 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct 170 #define AllocatInternalID(MUMPS_STRUC_T, outer) \ 171 do { \ 172 MUMPS_STRUC_T *inner; \ 173 PetscCall(PetscNew(&inner)); \ 174 outer->icntl = inner->icntl; \ 175 outer->cntl = inner->cntl; \ 176 outer->info = inner->info; \ 177 outer->infog = inner->infog; \ 178 outer->rinfo = inner->rinfo; \ 179 outer->rinfog = inner->rinfog; \ 180 outer->ooc_tmpdir = inner->ooc_tmpdir; \ 181 outer->ooc_prefix = inner->ooc_prefix; \ 182 /* the three field should never change after init */ \ 183 inner->comm_fortran = outer->comm_fortran; \ 184 inner->par = outer->par; \ 185 inner->sym = outer->sym; \ 186 outer->internal_id = inner; \ 187 } while (0) 188 189 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner 190 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision) 191 { 192 PetscFunctionBegin; 193 outer->precision = precision; 194 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 195 #if defined(PETSC_USE_COMPLEX) 196 if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer); 197 else AllocatInternalID(ZMUMPS_STRUC_C, outer); 198 #else 199 if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer); 200 else AllocatInternalID(DMUMPS_STRUC_C, outer); 201 #endif 202 #else 203 AllocatInternalID(MUMPS_STRUC_C, outer); 204 #endif 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \ 209 do { \ 210 MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \ 211 PetscCall(PetscFree(inner->a)); \ 212 PetscCall(PetscFree(inner->a_loc)); \ 213 PetscCall(PetscFree(inner->redrhs)); \ 214 PetscCall(PetscFree(inner->rhs)); \ 215 PetscCall(PetscFree(inner->rhs_sparse)); \ 216 PetscCall(PetscFree(inner->rhs_loc)); \ 217 PetscCall(PetscFree(inner->sol_loc)); \ 218 PetscCall(PetscFree(inner->schur)); \ 219 } while (0) 220 221 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer) 222 { 223 PetscFunctionBegin; 224 if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer 225 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 226 const PetscPrecision mumps_precision = outer->precision; 227 if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision 228 #if defined(PETSC_USE_COMPLEX) 229 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer); 230 else FreeInternalIDFields(ZMUMPS_STRUC_C, outer); 231 #else 232 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer); 233 else FreeInternalIDFields(DMUMPS_STRUC_C, outer); 234 #endif 235 } 236 #endif 237 PetscCall(PetscFree(outer->internal_id)); 238 } 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>. 243 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array. 244 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>) 245 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request. 246 // 247 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same. 248 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output. 249 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output. 250 // 251 // 252 // Input parameters: 253 // + convert - whether to do PetscScalar to MumpsScalar conversion 254 // . n - length of the PetscScalar array 255 // . pa - [n]], points to the PetscScalar array 256 // . precision - precision of MumpsScalar 257 // . m - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero. 258 // - ma - on input, an existing MumpsScalar array if any. 259 // 260 // Output parameters: 261 // + m - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0 262 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same. 263 // 264 // Note: 265 // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller. 266 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma) 267 { 268 PetscFunctionBegin; 269 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 270 const PetscPrecision mumps_precision = precision; 271 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision); 272 #if defined(PETSC_USE_COMPLEX) 273 if (mumps_precision != PETSC_SCALAR_PRECISION) { 274 if (mumps_precision == PETSC_PRECISION_SINGLE) { 275 if (*m < n) { 276 PetscCall(PetscFree(*ma)); 277 PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma)); 278 *m = n; 279 } 280 if (convert) { 281 CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma; 282 for (PetscCount i = 0; i < n; i++) { 283 b[i].r = PetscRealPart(pa[i]); 284 b[i].i = PetscImaginaryPart(pa[i]); 285 } 286 } 287 } else { 288 if (*m < n) { 289 PetscCall(PetscFree(*ma)); 290 PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma)); 291 *m = n; 292 } 293 if (convert) { 294 ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma; 295 for (PetscCount i = 0; i < n; i++) { 296 b[i].r = PetscRealPart(pa[i]); 297 b[i].i = PetscImaginaryPart(pa[i]); 298 } 299 } 300 } 301 } 302 #else 303 if (mumps_precision != PETSC_SCALAR_PRECISION) { 304 if (mumps_precision == PETSC_PRECISION_SINGLE) { 305 if (*m < n) { 306 PetscCall(PetscFree(*ma)); 307 PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma)); 308 *m = n; 309 } 310 if (convert) { 311 SMUMPS_REAL *b = *(SMUMPS_REAL **)ma; 312 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 313 } 314 } else { 315 if (*m < n) { 316 PetscCall(PetscFree(*ma)); 317 PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma)); 318 *m = n; 319 } 320 if (convert) { 321 DMUMPS_REAL *b = *(DMUMPS_REAL **)ma; 322 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 323 } 324 } 325 } 326 #endif 327 else 328 #endif 329 { 330 if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any 331 *ma = (void *)pa; // same precision, make them alias 332 *m = 0; 333 } 334 PetscFunctionReturn(PETSC_SUCCESS); 335 } 336 337 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>. 338 // 339 // 1) If the two types are different, cast array elements. 340 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 341 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa) 342 { 343 PetscFunctionBegin; 344 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 345 if (mumps_precision != PETSC_SCALAR_PRECISION) { 346 #if defined(PETSC_USE_COMPLEX) 347 if (mumps_precision == PETSC_PRECISION_SINGLE) { 348 PetscReal *a = (PetscReal *)pa; 349 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 350 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 351 } else { 352 PetscReal *a = (PetscReal *)pa; 353 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 354 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 355 } 356 #else 357 if (mumps_precision == PETSC_PRECISION_SINGLE) { 358 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 359 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 360 } else { 361 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 362 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 363 } 364 #endif 365 } else 366 #endif 367 PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n)); 368 PetscFunctionReturn(PETSC_SUCCESS); 369 } 370 371 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>. 372 // 373 // 1) If the two types are different, cast array elements. 374 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 375 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma) 376 { 377 PetscFunctionBegin; 378 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 379 if (mumps_precision != PETSC_SCALAR_PRECISION) { 380 #if defined(PETSC_USE_COMPLEX) 381 if (mumps_precision == PETSC_PRECISION_SINGLE) { 382 CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma; 383 for (PetscCount i = 0; i < n; i++) { 384 b[i].r = PetscRealPart(pa[i]); 385 b[i].i = PetscImaginaryPart(pa[i]); 386 } 387 } else { 388 ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma; 389 for (PetscCount i = 0; i < n; i++) { 390 b[i].r = PetscRealPart(pa[i]); 391 b[i].i = PetscImaginaryPart(pa[i]); 392 } 393 } 394 #else 395 if (mumps_precision == PETSC_PRECISION_SINGLE) { 396 SMUMPS_REAL *b = (SMUMPS_REAL *)ma; 397 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 398 } else { 399 DMUMPS_REAL *b = (DMUMPS_REAL *)ma; 400 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 401 } 402 #endif 403 } else 404 #endif 405 PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n)); 406 PetscFunctionReturn(PETSC_SUCCESS); 407 } 408 409 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id) 410 { 411 return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT; 412 } 413 414 #define PreMumpsCall(inner, outer, mumpsscalar) \ 415 do { \ 416 inner->job = outer->job; \ 417 inner->n = outer->n; \ 418 inner->nblk = outer->nblk; \ 419 inner->nnz = outer->nnz; \ 420 inner->irn = outer->irn; \ 421 inner->jcn = outer->jcn; \ 422 inner->a = (mumpsscalar *)outer->a; \ 423 inner->nnz_loc = outer->nnz_loc; \ 424 inner->irn_loc = outer->irn_loc; \ 425 inner->jcn_loc = outer->jcn_loc; \ 426 inner->a_loc = (mumpsscalar *)outer->a_loc; \ 427 inner->blkptr = outer->blkptr; \ 428 inner->blkvar = outer->blkvar; \ 429 inner->perm_in = outer->perm_in; \ 430 inner->rhs = (mumpsscalar *)outer->rhs; \ 431 inner->redrhs = (mumpsscalar *)outer->redrhs; \ 432 inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \ 433 inner->sol_loc = (mumpsscalar *)outer->sol_loc; \ 434 inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \ 435 inner->irhs_sparse = outer->irhs_sparse; \ 436 inner->irhs_ptr = outer->irhs_ptr; \ 437 inner->isol_loc = outer->isol_loc; \ 438 inner->irhs_loc = outer->irhs_loc; \ 439 inner->nrhs = outer->nrhs; \ 440 inner->lrhs = outer->lrhs; \ 441 inner->lredrhs = outer->lredrhs; \ 442 inner->nz_rhs = outer->nz_rhs; \ 443 inner->lsol_loc = outer->lsol_loc; \ 444 inner->nloc_rhs = outer->nloc_rhs; \ 445 inner->lrhs_loc = outer->lrhs_loc; \ 446 inner->schur_lld = outer->schur_lld; \ 447 inner->size_schur = outer->size_schur; \ 448 inner->listvar_schur = outer->listvar_schur; \ 449 inner->schur = (mumpsscalar *)outer->schur; \ 450 } while (0) 451 452 #define PostMumpsCall(inner, outer) \ 453 do { \ 454 outer->pivnul_list = inner->pivnul_list; \ 455 outer->mapping = inner->mapping; \ 456 } while (0) 457 458 // Entry for PETSc to call mumps 459 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer) 460 { 461 PetscFunctionBegin; 462 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 463 #if defined(PETSC_USE_COMPLEX) 464 if (outer->precision == PETSC_PRECISION_SINGLE) { 465 CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id; 466 PreMumpsCall(inner, outer, CMUMPS_COMPLEX); 467 PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner)); 468 PostMumpsCall(inner, outer); 469 } else { 470 ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id; 471 PreMumpsCall(inner, outer, ZMUMPS_COMPLEX); 472 PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner)); 473 PostMumpsCall(inner, outer); 474 } 475 #else 476 if (outer->precision == PETSC_PRECISION_SINGLE) { 477 SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id; 478 PreMumpsCall(inner, outer, SMUMPS_REAL); 479 PetscStackCallExternalVoid("smumps_c", smumps_c(inner)); 480 PostMumpsCall(inner, outer); 481 } else { 482 DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id; 483 PreMumpsCall(inner, outer, DMUMPS_REAL); 484 PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner)); 485 PostMumpsCall(inner, outer); 486 } 487 #endif 488 #else 489 MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id; 490 PreMumpsCall(inner, outer, MumpsScalar); 491 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner)); 492 PostMumpsCall(inner, outer); 493 #endif 494 PetscFunctionReturn(PETSC_SUCCESS); 495 } 496 497 /* macros s.t. indices match MUMPS documentation */ 498 #define ICNTL(I) icntl[(I) - 1] 499 #define INFOG(I) infog[(I) - 1] 500 #define INFO(I) info[(I) - 1] 501 502 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I! 503 #if defined(PETSC_USE_COMPLEX) 504 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i) 505 #else 506 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I]) 507 #endif 508 509 // Get a value from MumpsReal arrays. The value is convertible to PetscReal. 510 #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1]) 511 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1]) 512 #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1]) 513 514 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL> 515 #define ID_CNTL_SET(ID, I, VAL) \ 516 do { \ 517 if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \ 518 else ((double *)(ID).cntl)[(I) - 1] = (VAL); \ 519 } while (0) 520 521 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 522 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 523 #define PetscMUMPS_c(mumps) \ 524 do { \ 525 if (mumps->use_petsc_omp_support) { \ 526 if (mumps->is_omp_master) { \ 527 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 528 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 529 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 530 PetscCall(PetscFPTrapPop()); \ 531 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 532 } \ 533 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 534 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 535 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 536 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 537 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 538 */ \ 539 SMUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \ 540 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 541 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 542 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 543 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 544 } else { \ 545 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 546 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 547 PetscCall(PetscFPTrapPop()); \ 548 } \ 549 } while (0) 550 #else 551 #define PetscMUMPS_c(mumps) \ 552 do { \ 553 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 554 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 555 PetscCall(PetscFPTrapPop()); \ 556 } while (0) 557 #endif 558 559 typedef struct Mat_MUMPS Mat_MUMPS; 560 struct Mat_MUMPS { 561 XMUMPS_STRUC_C id; 562 563 MatStructure matstruc; 564 PetscMPIInt myid, petsc_size; 565 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 566 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 567 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 568 PetscMUMPSInt sym; 569 MPI_Comm mumps_comm; 570 PetscMUMPSInt *ICNTL_pre; 571 PetscReal *CNTL_pre; 572 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 573 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 574 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 575 PetscMUMPSInt ICNTL26; 576 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 577 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 578 PetscInt *rhs_nrow, max_nrhs; 579 PetscMPIInt *rhs_recvcounts, *rhs_disps; 580 PetscScalar *rhs_loc, *rhs_recvbuf; 581 #endif 582 Vec b_seq, x_seq; 583 PetscInt ninfo, *info; /* which INFO to display */ 584 PetscInt sizeredrhs; 585 PetscScalar *schur_sol; 586 PetscInt schur_sizesol; 587 PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision 588 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 589 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 590 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 591 592 /* Support for MATNEST */ 593 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 594 PetscCount *nest_vals_start; 595 PetscScalar *nest_vals; 596 597 /* stuff used by petsc/mumps OpenMP support*/ 598 PetscBool use_petsc_omp_support; 599 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 600 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 601 PetscCount *recvcount; /* a collection of nnz on omp_master */ 602 PetscMPIInt tag, omp_comm_size; 603 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 604 MPI_Request *reqs; 605 }; 606 607 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 608 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 609 */ 610 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 611 { 612 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 613 614 PetscFunctionBegin; 615 #if defined(PETSC_USE_64BIT_INDICES) 616 { 617 PetscInt i; 618 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 619 PetscCall(PetscFree(mumps->ia_alloc)); 620 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 621 mumps->cur_ilen = nrow + 1; 622 } 623 if (nnz > mumps->cur_jlen) { 624 PetscCall(PetscFree(mumps->ja_alloc)); 625 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 626 mumps->cur_jlen = nnz; 627 } 628 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 629 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 630 *ia_mumps = mumps->ia_alloc; 631 *ja_mumps = mumps->ja_alloc; 632 } 633 #else 634 *ia_mumps = ia; 635 *ja_mumps = ja; 636 #endif 637 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 642 { 643 PetscFunctionBegin; 644 PetscCall(PetscFree(mumps->id.listvar_schur)); 645 PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID() 646 PetscCall(PetscFree(mumps->schur_sol)); 647 mumps->id.size_schur = 0; 648 mumps->id.schur_lld = 0; 649 if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 /* solve with rhs in mumps->id.redrhs and return in the same location */ 654 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 655 { 656 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 657 Mat S, B, X; // solve S*X = B; all three matrices are dense 658 MatFactorSchurStatus schurstatus; 659 PetscInt sizesol; 660 const PetscScalar *xarray; 661 662 PetscFunctionBegin; 663 PetscCall(MatFactorFactorizeSchurComplement(F)); 664 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 665 PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs)); 666 667 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B)); 668 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 669 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 670 PetscCall(MatBindToCPU(B, S->boundtocpu)); 671 #endif 672 switch (schurstatus) { 673 case MAT_FACTOR_SCHUR_FACTORED: 674 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X)); 675 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 676 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 677 PetscCall(MatBindToCPU(X, S->boundtocpu)); 678 #endif 679 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 680 PetscCall(MatMatSolveTranspose(S, B, X)); 681 } else { 682 PetscCall(MatMatSolve(S, B, X)); 683 } 684 break; 685 case MAT_FACTOR_SCHUR_INVERTED: 686 sizesol = mumps->id.nrhs * mumps->id.size_schur; 687 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 688 PetscCall(PetscFree(mumps->schur_sol)); 689 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 690 mumps->schur_sizesol = sizesol; 691 } 692 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 693 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 694 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 695 PetscCall(MatBindToCPU(X, S->boundtocpu)); 696 #endif 697 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 698 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 699 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 700 } else { 701 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 702 } 703 PetscCall(MatProductSetFromOptions(X)); 704 PetscCall(MatProductSymbolic(X)); 705 PetscCall(MatProductNumeric(X)); 706 707 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 708 break; 709 default: 710 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 711 } 712 // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device. 713 PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs. 714 PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs)); 715 PetscCall(MatDenseRestoreArrayRead(X, &xarray)); 716 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 717 PetscCall(MatDestroy(&B)); 718 PetscCall(MatDestroy(&X)); 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 722 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 723 { 724 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 725 726 PetscFunctionBegin; 727 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 728 PetscFunctionReturn(PETSC_SUCCESS); 729 } 730 if (!expansion) { /* prepare for the condensation step */ 731 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 732 /* allocate MUMPS internal array to store reduced right-hand sides */ 733 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 734 mumps->id.lredrhs = mumps->id.size_schur; 735 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 736 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 737 PetscCall(PetscFree(mumps->redrhs)); 738 PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs)); 739 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs)); 740 } 741 } else { /* prepare for the expansion step */ 742 PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */ 743 mumps->id.ICNTL(26) = 2; /* expansion phase */ 744 PetscMUMPS_c(mumps); 745 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 746 /* restore defaults */ 747 mumps->id.ICNTL(26) = -1; 748 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 749 if (mumps->id.nrhs > 1) { 750 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 751 PetscCall(PetscFree(mumps->redrhs)); 752 mumps->id.redrhs_len = 0; 753 mumps->id.lredrhs = 0; 754 mumps->sizeredrhs = 0; 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 /* 761 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 762 763 input: 764 A - matrix in aij,baij or sbaij format 765 shift - 0: C style output triple; 1: Fortran style output triple. 766 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 767 MAT_REUSE_MATRIX: only the values in v array are updated 768 output: 769 nnz - dim of r, c, and v (number of local nonzero entries of A) 770 r, c, v - row and col index, matrix values (matrix triples) 771 772 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 773 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 774 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 775 776 */ 777 778 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 779 { 780 const PetscScalar *av; 781 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 782 PetscCount nz, rnz, k; 783 PetscMUMPSInt *row, *col; 784 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 785 786 PetscFunctionBegin; 787 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 788 if (reuse == MAT_INITIAL_MATRIX) { 789 nz = aa->nz; 790 ai = aa->i; 791 aj = aa->j; 792 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 793 for (PetscCount i = k = 0; i < M; i++) { 794 rnz = ai[i + 1] - ai[i]; 795 ajj = aj + ai[i]; 796 for (PetscCount j = 0; j < rnz; j++) { 797 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 798 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 799 k++; 800 } 801 } 802 mumps->val = (PetscScalar *)av; 803 mumps->irn = row; 804 mumps->jcn = col; 805 mumps->nnz = nz; 806 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 807 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 808 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 813 { 814 PetscCount nz, i, j, k, r; 815 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 816 PetscMUMPSInt *row, *col; 817 818 PetscFunctionBegin; 819 nz = a->sliidx[a->totalslices]; 820 if (reuse == MAT_INITIAL_MATRIX) { 821 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 822 for (i = k = 0; i < a->totalslices; i++) { 823 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 824 } 825 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 826 mumps->irn = row; 827 mumps->jcn = col; 828 mumps->nnz = nz; 829 mumps->val = a->val; 830 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 831 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 836 { 837 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 838 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 839 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 840 PetscInt bs; 841 PetscMUMPSInt *row, *col; 842 843 PetscFunctionBegin; 844 if (reuse == MAT_INITIAL_MATRIX) { 845 PetscCall(MatGetBlockSize(A, &bs)); 846 M = A->rmap->N / bs; 847 ai = aa->i; 848 aj = aa->j; 849 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 850 for (i = 0; i < M; i++) { 851 ajj = aj + ai[i]; 852 rnz = ai[i + 1] - ai[i]; 853 for (k = 0; k < rnz; k++) { 854 for (j = 0; j < bs; j++) { 855 for (m = 0; m < bs; m++) { 856 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 857 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 858 idx++; 859 } 860 } 861 } 862 } 863 mumps->irn = row; 864 mumps->jcn = col; 865 mumps->nnz = nz; 866 mumps->val = aa->a; 867 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 868 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 869 PetscFunctionReturn(PETSC_SUCCESS); 870 } 871 872 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 873 { 874 const PetscInt *ai, *aj, *ajj; 875 PetscInt bs; 876 PetscCount nz, rnz, i, j, k, m; 877 PetscMUMPSInt *row, *col; 878 PetscScalar *val; 879 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 880 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 881 #if defined(PETSC_USE_COMPLEX) 882 PetscBool isset, hermitian; 883 #endif 884 885 PetscFunctionBegin; 886 #if defined(PETSC_USE_COMPLEX) 887 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 888 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 889 #endif 890 ai = aa->i; 891 aj = aa->j; 892 PetscCall(MatGetBlockSize(A, &bs)); 893 if (reuse == MAT_INITIAL_MATRIX) { 894 const PetscCount alloc_size = aa->nz * bs2; 895 896 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 897 if (bs > 1) { 898 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 899 mumps->val = mumps->val_alloc; 900 } else { 901 mumps->val = aa->a; 902 } 903 mumps->irn = row; 904 mumps->jcn = col; 905 } else { 906 row = mumps->irn; 907 col = mumps->jcn; 908 } 909 val = mumps->val; 910 911 nz = 0; 912 if (bs > 1) { 913 for (i = 0; i < mbs; i++) { 914 rnz = ai[i + 1] - ai[i]; 915 ajj = aj + ai[i]; 916 for (j = 0; j < rnz; j++) { 917 for (k = 0; k < bs; k++) { 918 for (m = 0; m < bs; m++) { 919 if (ajj[j] > i || k >= m) { 920 if (reuse == MAT_INITIAL_MATRIX) { 921 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 922 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 923 } 924 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 925 } 926 } 927 } 928 } 929 } 930 } else if (reuse == MAT_INITIAL_MATRIX) { 931 for (i = 0; i < mbs; i++) { 932 rnz = ai[i + 1] - ai[i]; 933 ajj = aj + ai[i]; 934 for (j = 0; j < rnz; j++) { 935 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 936 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 937 nz++; 938 } 939 } 940 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 941 } else if (mumps->nest_vals) 942 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 943 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 944 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 945 PetscFunctionReturn(PETSC_SUCCESS); 946 } 947 948 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 949 { 950 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 951 PetscCount nz, rnz, i, j; 952 const PetscScalar *av, *v1; 953 PetscScalar *val; 954 PetscMUMPSInt *row, *col; 955 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 956 PetscBool diagDense; 957 #if defined(PETSC_USE_COMPLEX) 958 PetscBool hermitian, isset; 959 #endif 960 961 PetscFunctionBegin; 962 #if defined(PETSC_USE_COMPLEX) 963 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 964 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 965 #endif 966 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 967 ai = aa->i; 968 aj = aa->j; 969 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 970 if (reuse == MAT_INITIAL_MATRIX) { 971 /* count nz in the upper triangular part of A */ 972 nz = 0; 973 if (!diagDense) { 974 for (i = 0; i < M; i++) { 975 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 976 for (j = ai[i]; j < ai[i + 1]; j++) { 977 if (aj[j] < i) continue; 978 nz++; 979 } 980 } else { 981 nz += ai[i + 1] - adiag[i]; 982 } 983 } 984 } else { 985 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 986 } 987 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 988 PetscCall(PetscMalloc1(nz, &val)); 989 mumps->nnz = nz; 990 mumps->irn = row; 991 mumps->jcn = col; 992 mumps->val = mumps->val_alloc = val; 993 994 nz = 0; 995 if (!diagDense) { 996 for (i = 0; i < M; i++) { 997 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 998 for (j = ai[i]; j < ai[i + 1]; j++) { 999 if (aj[j] < i) continue; 1000 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1001 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 1002 val[nz] = av[j]; 1003 nz++; 1004 } 1005 } else { 1006 rnz = ai[i + 1] - adiag[i]; 1007 ajj = aj + adiag[i]; 1008 v1 = av + adiag[i]; 1009 for (j = 0; j < rnz; j++) { 1010 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1011 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1012 val[nz++] = v1[j]; 1013 } 1014 } 1015 } 1016 } else { 1017 for (i = 0; i < M; i++) { 1018 rnz = ai[i + 1] - adiag[i]; 1019 ajj = aj + adiag[i]; 1020 v1 = av + adiag[i]; 1021 for (j = 0; j < rnz; j++) { 1022 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1023 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1024 val[nz++] = v1[j]; 1025 } 1026 } 1027 } 1028 } else { 1029 nz = 0; 1030 val = mumps->val; 1031 if (!diagDense) { 1032 for (i = 0; i < M; i++) { 1033 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1034 for (j = ai[i]; j < ai[i + 1]; j++) { 1035 if (aj[j] < i) continue; 1036 val[nz++] = av[j]; 1037 } 1038 } else { 1039 rnz = ai[i + 1] - adiag[i]; 1040 v1 = av + adiag[i]; 1041 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1042 } 1043 } 1044 } else { 1045 for (i = 0; i < M; i++) { 1046 rnz = ai[i + 1] - adiag[i]; 1047 v1 = av + adiag[i]; 1048 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1049 } 1050 } 1051 } 1052 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1057 { 1058 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 1059 PetscInt bs; 1060 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 1061 PetscMUMPSInt *row, *col; 1062 const PetscScalar *av, *bv, *v1, *v2; 1063 PetscScalar *val; 1064 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 1065 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 1066 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1067 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 1068 #if defined(PETSC_USE_COMPLEX) 1069 PetscBool hermitian, isset; 1070 #endif 1071 1072 PetscFunctionBegin; 1073 #if defined(PETSC_USE_COMPLEX) 1074 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1075 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1076 #endif 1077 PetscCall(MatGetBlockSize(A, &bs)); 1078 rstart = A->rmap->rstart; 1079 ai = aa->i; 1080 aj = aa->j; 1081 bi = bb->i; 1082 bj = bb->j; 1083 av = aa->a; 1084 bv = bb->a; 1085 1086 garray = mat->garray; 1087 1088 if (reuse == MAT_INITIAL_MATRIX) { 1089 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 1090 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1091 PetscCall(PetscMalloc1(nz, &val)); 1092 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 1093 mumps->irn = row; 1094 mumps->jcn = col; 1095 mumps->val = mumps->val_alloc = val; 1096 } else { 1097 val = mumps->val; 1098 } 1099 1100 jj = 0; 1101 irow = rstart; 1102 for (i = 0; i < mbs; i++) { 1103 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1104 countA = ai[i + 1] - ai[i]; 1105 countB = bi[i + 1] - bi[i]; 1106 bjj = bj + bi[i]; 1107 v1 = av + ai[i] * bs2; 1108 v2 = bv + bi[i] * bs2; 1109 1110 if (bs > 1) { 1111 /* A-part */ 1112 for (j = 0; j < countA; j++) { 1113 for (k = 0; k < bs; k++) { 1114 for (m = 0; m < bs; m++) { 1115 if (rstart + ajj[j] * bs > irow || k >= m) { 1116 if (reuse == MAT_INITIAL_MATRIX) { 1117 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1118 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 1119 } 1120 val[jj++] = v1[j * bs2 + m + k * bs]; 1121 } 1122 } 1123 } 1124 } 1125 1126 /* B-part */ 1127 for (j = 0; j < countB; j++) { 1128 for (k = 0; k < bs; k++) { 1129 for (m = 0; m < bs; m++) { 1130 if (reuse == MAT_INITIAL_MATRIX) { 1131 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1132 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 1133 } 1134 val[jj++] = v2[j * bs2 + m + k * bs]; 1135 } 1136 } 1137 } 1138 } else { 1139 /* A-part */ 1140 for (j = 0; j < countA; j++) { 1141 if (reuse == MAT_INITIAL_MATRIX) { 1142 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1143 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1144 } 1145 val[jj++] = v1[j]; 1146 } 1147 1148 /* B-part */ 1149 for (j = 0; j < countB; j++) { 1150 if (reuse == MAT_INITIAL_MATRIX) { 1151 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1152 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1153 } 1154 val[jj++] = v2[j]; 1155 } 1156 } 1157 irow += bs; 1158 } 1159 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 1160 PetscFunctionReturn(PETSC_SUCCESS); 1161 } 1162 1163 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1164 { 1165 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1166 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 1167 PetscMUMPSInt *row, *col; 1168 const PetscScalar *av, *bv, *v1, *v2; 1169 PetscScalar *val; 1170 Mat Ad, Ao; 1171 Mat_SeqAIJ *aa; 1172 Mat_SeqAIJ *bb; 1173 1174 PetscFunctionBegin; 1175 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1176 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1177 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1178 1179 aa = (Mat_SeqAIJ *)Ad->data; 1180 bb = (Mat_SeqAIJ *)Ao->data; 1181 ai = aa->i; 1182 aj = aa->j; 1183 bi = bb->i; 1184 bj = bb->j; 1185 1186 rstart = A->rmap->rstart; 1187 cstart = A->cmap->rstart; 1188 1189 if (reuse == MAT_INITIAL_MATRIX) { 1190 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 1191 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1192 PetscCall(PetscMalloc1(nz, &val)); 1193 mumps->nnz = nz; 1194 mumps->irn = row; 1195 mumps->jcn = col; 1196 mumps->val = mumps->val_alloc = val; 1197 } else { 1198 val = mumps->val; 1199 } 1200 1201 jj = 0; 1202 irow = rstart; 1203 for (i = 0; i < m; i++) { 1204 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1205 countA = ai[i + 1] - ai[i]; 1206 countB = bi[i + 1] - bi[i]; 1207 bjj = bj + bi[i]; 1208 v1 = av + ai[i]; 1209 v2 = bv + bi[i]; 1210 1211 /* A-part */ 1212 for (j = 0; j < countA; j++) { 1213 if (reuse == MAT_INITIAL_MATRIX) { 1214 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1215 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 1216 } 1217 val[jj++] = v1[j]; 1218 } 1219 1220 /* B-part */ 1221 for (j = 0; j < countB; j++) { 1222 if (reuse == MAT_INITIAL_MATRIX) { 1223 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1224 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1225 } 1226 val[jj++] = v2[j]; 1227 } 1228 irow++; 1229 } 1230 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1231 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1232 PetscFunctionReturn(PETSC_SUCCESS); 1233 } 1234 1235 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1236 { 1237 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 1238 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 1239 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1240 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 1241 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 1242 const PetscInt bs2 = mat->bs2; 1243 PetscInt bs; 1244 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 1245 PetscMUMPSInt *row, *col; 1246 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 1247 PetscScalar *val; 1248 1249 PetscFunctionBegin; 1250 PetscCall(MatGetBlockSize(A, &bs)); 1251 if (reuse == MAT_INITIAL_MATRIX) { 1252 nz = bs2 * (aa->nz + bb->nz); 1253 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1254 PetscCall(PetscMalloc1(nz, &val)); 1255 mumps->nnz = nz; 1256 mumps->irn = row; 1257 mumps->jcn = col; 1258 mumps->val = mumps->val_alloc = val; 1259 } else { 1260 val = mumps->val; 1261 } 1262 1263 jj = 0; 1264 irow = rstart; 1265 for (i = 0; i < mbs; i++) { 1266 countA = ai[i + 1] - ai[i]; 1267 countB = bi[i + 1] - bi[i]; 1268 ajj = aj + ai[i]; 1269 bjj = bj + bi[i]; 1270 v1 = av + bs2 * ai[i]; 1271 v2 = bv + bs2 * bi[i]; 1272 1273 idx = 0; 1274 /* A-part */ 1275 for (k = 0; k < countA; k++) { 1276 for (j = 0; j < bs; j++) { 1277 for (n = 0; n < bs; n++) { 1278 if (reuse == MAT_INITIAL_MATRIX) { 1279 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1280 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 1281 } 1282 val[jj++] = v1[idx++]; 1283 } 1284 } 1285 } 1286 1287 idx = 0; 1288 /* B-part */ 1289 for (k = 0; k < countB; k++) { 1290 for (j = 0; j < bs; j++) { 1291 for (n = 0; n < bs; n++) { 1292 if (reuse == MAT_INITIAL_MATRIX) { 1293 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1294 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 1295 } 1296 val[jj++] = v2[idx++]; 1297 } 1298 } 1299 } 1300 irow += bs; 1301 } 1302 PetscFunctionReturn(PETSC_SUCCESS); 1303 } 1304 1305 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1306 { 1307 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1308 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 1309 PetscMUMPSInt *row, *col; 1310 const PetscScalar *av, *bv, *v1, *v2; 1311 PetscScalar *val; 1312 Mat Ad, Ao; 1313 Mat_SeqAIJ *aa; 1314 Mat_SeqAIJ *bb; 1315 #if defined(PETSC_USE_COMPLEX) 1316 PetscBool hermitian, isset; 1317 #endif 1318 1319 PetscFunctionBegin; 1320 #if defined(PETSC_USE_COMPLEX) 1321 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1322 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1323 #endif 1324 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1325 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1326 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1327 1328 aa = (Mat_SeqAIJ *)Ad->data; 1329 bb = (Mat_SeqAIJ *)Ao->data; 1330 ai = aa->i; 1331 aj = aa->j; 1332 bi = bb->i; 1333 bj = bb->j; 1334 PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL)); 1335 rstart = A->rmap->rstart; 1336 1337 if (reuse == MAT_INITIAL_MATRIX) { 1338 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 1339 nzb = 0; /* num of upper triangular entries in mat->B */ 1340 for (i = 0; i < m; i++) { 1341 nza += (ai[i + 1] - adiag[i]); 1342 countB = bi[i + 1] - bi[i]; 1343 bjj = bj + bi[i]; 1344 for (j = 0; j < countB; j++) { 1345 if (garray[bjj[j]] > rstart) nzb++; 1346 } 1347 } 1348 1349 nz = nza + nzb; /* total nz of upper triangular part of mat */ 1350 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1351 PetscCall(PetscMalloc1(nz, &val)); 1352 mumps->nnz = nz; 1353 mumps->irn = row; 1354 mumps->jcn = col; 1355 mumps->val = mumps->val_alloc = val; 1356 } else { 1357 val = mumps->val; 1358 } 1359 1360 jj = 0; 1361 irow = rstart; 1362 for (i = 0; i < m; i++) { 1363 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 1364 v1 = av + adiag[i]; 1365 countA = ai[i + 1] - adiag[i]; 1366 countB = bi[i + 1] - bi[i]; 1367 bjj = bj + bi[i]; 1368 v2 = bv + bi[i]; 1369 1370 /* A-part */ 1371 for (j = 0; j < countA; j++) { 1372 if (reuse == MAT_INITIAL_MATRIX) { 1373 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1374 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1375 } 1376 val[jj++] = v1[j]; 1377 } 1378 1379 /* B-part */ 1380 for (j = 0; j < countB; j++) { 1381 if (garray[bjj[j]] > rstart) { 1382 if (reuse == MAT_INITIAL_MATRIX) { 1383 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1384 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1385 } 1386 val[jj++] = v2[j]; 1387 } 1388 } 1389 irow++; 1390 } 1391 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1392 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1397 { 1398 const PetscScalar *av; 1399 const PetscInt M = A->rmap->n; 1400 PetscCount i; 1401 PetscMUMPSInt *row, *col; 1402 Vec v; 1403 1404 PetscFunctionBegin; 1405 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1406 PetscCall(VecGetArrayRead(v, &av)); 1407 if (reuse == MAT_INITIAL_MATRIX) { 1408 PetscCall(PetscMalloc2(M, &row, M, &col)); 1409 for (i = 0; i < M; i++) { 1410 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1411 col[i] = row[i]; 1412 } 1413 mumps->val = (PetscScalar *)av; 1414 mumps->irn = row; 1415 mumps->jcn = col; 1416 mumps->nnz = M; 1417 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1418 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1419 PetscCall(VecRestoreArrayRead(v, &av)); 1420 PetscFunctionReturn(PETSC_SUCCESS); 1421 } 1422 1423 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1424 { 1425 PetscScalar *v; 1426 const PetscInt m = A->rmap->n, N = A->cmap->N; 1427 PetscInt lda; 1428 PetscCount i, j; 1429 PetscMUMPSInt *row, *col; 1430 1431 PetscFunctionBegin; 1432 PetscCall(MatDenseGetArray(A, &v)); 1433 PetscCall(MatDenseGetLDA(A, &lda)); 1434 if (reuse == MAT_INITIAL_MATRIX) { 1435 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1436 for (i = 0; i < m; i++) { 1437 col[i] = 0; 1438 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1439 } 1440 for (j = 1; j < N; j++) { 1441 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1442 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1443 } 1444 if (lda == m) mumps->val = v; 1445 else { 1446 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1447 mumps->val_alloc = mumps->val; 1448 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1449 } 1450 mumps->irn = row; 1451 mumps->jcn = col; 1452 mumps->nnz = m * N; 1453 } else { 1454 if (lda == m && !mumps->nest_vals) mumps->val = v; 1455 else { 1456 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1457 } 1458 } 1459 PetscCall(MatDenseRestoreArray(A, &v)); 1460 PetscFunctionReturn(PETSC_SUCCESS); 1461 } 1462 1463 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 1464 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 1465 // and its rows and columns permuted 1466 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 1467 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 1468 { 1469 Mat A; 1470 PetscScalar s[2]; 1471 PetscBool isTrans, isHTrans, compare; 1472 1473 PetscFunctionBegin; 1474 do { 1475 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1476 if (isTrans) { 1477 PetscCall(MatTransposeGetMat(*sub, &A)); 1478 isHTrans = PETSC_FALSE; 1479 } else { 1480 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1481 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 1482 } 1483 compare = (PetscBool)(isTrans || isHTrans); 1484 if (compare) { 1485 if (vshift && vscale) { 1486 PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1487 if (!*conjugate) { 1488 *vshift += s[0] * *vscale; 1489 *vscale *= s[1]; 1490 } else { 1491 *vshift += PetscConj(s[0]) * *vscale; 1492 *vscale *= PetscConj(s[1]); 1493 } 1494 } 1495 if (swap) *swap = (PetscBool)!*swap; 1496 if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 1497 *sub = A; 1498 } 1499 } while (compare); 1500 PetscFunctionReturn(PETSC_SUCCESS); 1501 } 1502 1503 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1504 { 1505 Mat **mats; 1506 PetscInt nr, nc; 1507 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1508 1509 PetscFunctionBegin; 1510 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1511 if (reuse == MAT_INITIAL_MATRIX) { 1512 PetscMUMPSInt *irns, *jcns; 1513 PetscScalar *vals; 1514 PetscCount totnnz, cumnnz, maxnnz; 1515 PetscInt *pjcns_w, Mbs = 0; 1516 IS *rows, *cols; 1517 PetscInt **rows_idx, **cols_idx; 1518 1519 cumnnz = 0; 1520 maxnnz = 0; 1521 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1522 for (PetscInt r = 0; r < nr; r++) { 1523 for (PetscInt c = 0; c < nc; c++) { 1524 Mat sub = mats[r][c]; 1525 1526 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1527 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1528 if (sub) { 1529 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1530 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 1531 MatInfo info; 1532 1533 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 1534 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1535 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1536 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1537 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1538 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1539 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1540 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1541 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1542 1543 if (chol) { 1544 if (r == c) { 1545 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1546 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1547 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1548 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1549 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1550 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1551 } else { 1552 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1553 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1554 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1555 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1556 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1557 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1558 } 1559 } else { 1560 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1561 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1562 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1563 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1564 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1565 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1566 } 1567 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1568 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1569 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1570 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1571 maxnnz = PetscMax(maxnnz, info.nz_used); 1572 } 1573 } 1574 } 1575 1576 /* Allocate total COO */ 1577 totnnz = cumnnz; 1578 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1579 PetscCall(PetscMalloc1(totnnz, &vals)); 1580 1581 /* Handle rows and column maps 1582 We directly map rows and use an SF for the columns */ 1583 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1584 PetscCall(MatNestGetISs(A, rows, cols)); 1585 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1586 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1587 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1588 else (void)maxnnz; 1589 1590 cumnnz = 0; 1591 for (PetscInt r = 0; r < nr; r++) { 1592 for (PetscInt c = 0; c < nc; c++) { 1593 Mat sub = mats[r][c]; 1594 const PetscInt *ridx = rows_idx[r]; 1595 const PetscInt *cidx = cols_idx[c]; 1596 PetscScalar vscale = 1.0, vshift = 0.0; 1597 PetscInt rst, size, bs; 1598 PetscSF csf; 1599 PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 1600 PetscLayout cmap; 1601 PetscInt innz; 1602 1603 mumps->nest_vals_start[r * nc + c] = cumnnz; 1604 if (c == r) { 1605 PetscCall(ISGetSize(rows[r], &size)); 1606 if (!mumps->nest_convert_to_triples[r * nc + c]) { 1607 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1608 } 1609 PetscCall(MatGetBlockSize(sub, &bs)); 1610 Mbs += size / bs; 1611 } 1612 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1613 1614 /* Extract inner blocks if needed */ 1615 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 1616 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1617 1618 /* Get column layout to map off-process columns */ 1619 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1620 1621 /* Get row start to map on-process rows */ 1622 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1623 1624 /* Directly use the mumps datastructure and use C ordering for now */ 1625 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1626 1627 /* Swap the role of rows and columns indices for transposed blocks 1628 since we need values with global final ordering */ 1629 if (swap) { 1630 cidx = rows_idx[r]; 1631 ridx = cols_idx[c]; 1632 } 1633 1634 /* Communicate column indices 1635 This could have been done with a single SF but it would have complicated the code a lot. 1636 But since we do it only once, we pay the price of setting up an SF for each block */ 1637 if (PetscDefined(USE_64BIT_INDICES)) { 1638 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1639 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1640 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1641 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1642 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1643 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1644 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1645 PetscCall(PetscSFDestroy(&csf)); 1646 1647 /* Import indices: use direct map for rows and mapped indices for columns */ 1648 if (swap) { 1649 for (PetscInt k = 0; k < mumps->nnz; k++) { 1650 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1651 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1652 } 1653 } else { 1654 for (PetscInt k = 0; k < mumps->nnz; k++) { 1655 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1656 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1657 } 1658 } 1659 1660 /* Import values to full COO */ 1661 if (conjugate) { /* conjugate the entries */ 1662 PetscScalar *v = vals + cumnnz; 1663 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 1664 } else if (vscale != 1.0) { 1665 PetscScalar *v = vals + cumnnz; 1666 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 1667 } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1668 1669 /* Shift new starting point and sanity check */ 1670 cumnnz += mumps->nnz; 1671 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1672 1673 /* Free scratch memory */ 1674 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1675 PetscCall(PetscFree(mumps->val_alloc)); 1676 mumps->val = NULL; 1677 mumps->nnz = 0; 1678 } 1679 } 1680 if (mumps->id.ICNTL(15) == 1) { 1681 if (Mbs != A->rmap->N) { 1682 PetscMPIInt rank, size; 1683 1684 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1685 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1686 if (rank == 0) { 1687 PetscInt shift = 0; 1688 1689 PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 1690 PetscCall(PetscFree(mumps->id.blkptr)); 1691 PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 1692 mumps->id.blkptr[0] = 1; 1693 for (PetscInt i = 0; i < size; ++i) { 1694 for (PetscInt r = 0; r < nr; r++) { 1695 Mat sub = mats[r][r]; 1696 const PetscInt *ranges; 1697 PetscInt bs; 1698 1699 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1700 PetscCall(MatGetOwnershipRanges(sub, &ranges)); 1701 PetscCall(MatGetBlockSize(sub, &bs)); 1702 for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 1703 shift += (ranges[i + 1] - ranges[i]) / bs; 1704 } 1705 } 1706 } 1707 } else mumps->id.ICNTL(15) = 0; 1708 } 1709 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1710 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1711 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1712 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1713 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1714 mumps->nest_vals_start[nr * nc] = cumnnz; 1715 1716 /* Set pointers for final MUMPS data structure */ 1717 mumps->nest_vals = vals; 1718 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1719 mumps->val = vals; 1720 mumps->irn = irns; 1721 mumps->jcn = jcns; 1722 mumps->nnz = cumnnz; 1723 } else { 1724 PetscScalar *oval = mumps->nest_vals; 1725 for (PetscInt r = 0; r < nr; r++) { 1726 for (PetscInt c = 0; c < nc; c++) { 1727 PetscBool conjugate = PETSC_FALSE; 1728 Mat sub = mats[r][c]; 1729 PetscScalar vscale = 1.0, vshift = 0.0; 1730 PetscInt midx = r * nc + c; 1731 1732 if (!mumps->nest_convert_to_triples[midx]) continue; 1733 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 1734 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1735 mumps->val = oval + mumps->nest_vals_start[midx]; 1736 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1737 if (conjugate) { 1738 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1739 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 1740 } else if (vscale != 1.0) { 1741 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1742 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 1743 } 1744 } 1745 } 1746 mumps->val = oval; 1747 } 1748 PetscFunctionReturn(PETSC_SUCCESS); 1749 } 1750 1751 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1752 { 1753 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1754 1755 PetscFunctionBegin; 1756 PetscCall(PetscFree(mumps->id.isol_loc)); 1757 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1758 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1759 PetscCall(VecDestroy(&mumps->b_seq)); 1760 PetscCall(VecDestroy(&mumps->x_seq)); 1761 PetscCall(PetscFree(mumps->id.perm_in)); 1762 PetscCall(PetscFree(mumps->id.blkvar)); 1763 PetscCall(PetscFree(mumps->id.blkptr)); 1764 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1765 PetscCall(PetscFree(mumps->val_alloc)); 1766 PetscCall(PetscFree(mumps->info)); 1767 PetscCall(PetscFree(mumps->ICNTL_pre)); 1768 PetscCall(PetscFree(mumps->CNTL_pre)); 1769 PetscCall(MatMumpsResetSchur_Private(mumps)); 1770 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1771 mumps->id.job = JOB_END; 1772 PetscMUMPS_c(mumps); 1773 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1774 if (mumps->mumps_comm != MPI_COMM_NULL) { 1775 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1776 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1777 } 1778 } 1779 PetscCall(MatMumpsFreeInternalID(&mumps->id)); 1780 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1781 if (mumps->use_petsc_omp_support) { 1782 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1783 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1784 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1785 } 1786 #endif 1787 PetscCall(PetscFree(mumps->ia_alloc)); 1788 PetscCall(PetscFree(mumps->ja_alloc)); 1789 PetscCall(PetscFree(mumps->recvcount)); 1790 PetscCall(PetscFree(mumps->reqs)); 1791 PetscCall(PetscFree(mumps->irhs_loc)); 1792 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1793 PetscCall(PetscFree(mumps->nest_vals)); 1794 PetscCall(PetscFree(A->data)); 1795 1796 /* clear composed functions */ 1797 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1798 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1799 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1800 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1801 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1802 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1803 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1804 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1805 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1806 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1807 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1808 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1809 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1810 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1811 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 1812 PetscFunctionReturn(PETSC_SUCCESS); 1813 } 1814 1815 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1816 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1817 { 1818 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1819 const PetscMPIInt ompsize = mumps->omp_comm_size; 1820 PetscInt i, m, M, rstart; 1821 1822 PetscFunctionBegin; 1823 PetscCall(MatGetSize(A, &M, NULL)); 1824 PetscCall(MatGetLocalSize(A, &m, NULL)); 1825 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1826 if (ompsize == 1) { 1827 if (!mumps->irhs_loc) { 1828 mumps->nloc_rhs = (PetscMUMPSInt)m; 1829 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1830 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1831 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1832 } 1833 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1834 } else { 1835 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1836 const PetscInt *ranges; 1837 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1838 MPI_Group petsc_group, omp_group; 1839 PetscScalar *recvbuf = NULL; 1840 1841 if (mumps->is_omp_master) { 1842 /* Lazily initialize the omp stuff for distributed rhs */ 1843 if (!mumps->irhs_loc) { 1844 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1845 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1846 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1847 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1848 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1849 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1850 1851 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1852 mumps->nloc_rhs = 0; 1853 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1854 for (j = 0; j < ompsize; j++) { 1855 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1856 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1857 } 1858 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1859 for (j = k = 0; j < ompsize; j++) { 1860 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */ 1861 } 1862 1863 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1864 PetscCallMPI(MPI_Group_free(&petsc_group)); 1865 PetscCallMPI(MPI_Group_free(&omp_group)); 1866 } 1867 1868 /* Realloc buffers when current nrhs is bigger than what we have met */ 1869 if (nrhs > mumps->max_nrhs) { 1870 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1871 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1872 mumps->max_nrhs = nrhs; 1873 } 1874 1875 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1876 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1877 mumps->rhs_disps[0] = 0; 1878 for (j = 1; j < ompsize; j++) { 1879 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1880 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1881 } 1882 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1883 } 1884 1885 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1886 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1887 1888 if (mumps->is_omp_master) { 1889 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1890 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1891 for (j = 0; j < ompsize; j++) { 1892 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1893 dst = dstbase; 1894 for (i = 0; i < nrhs; i++) { 1895 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1896 src += mumps->rhs_nrow[j]; 1897 dst += mumps->nloc_rhs; 1898 } 1899 dstbase += mumps->rhs_nrow[j]; 1900 } 1901 } 1902 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1903 } 1904 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1905 } 1906 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1907 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1908 mumps->id.lrhs_loc = mumps->nloc_rhs; 1909 mumps->id.irhs_loc = mumps->irhs_loc; 1910 PetscFunctionReturn(PETSC_SUCCESS); 1911 } 1912 1913 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1914 { 1915 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1916 const PetscScalar *barray = NULL; 1917 PetscScalar *array; 1918 IS is_iden, is_petsc; 1919 PetscInt i; 1920 PetscBool second_solve = PETSC_FALSE; 1921 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1922 1923 PetscFunctionBegin; 1924 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1925 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1926 &cite1)); 1927 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1928 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1929 &cite2)); 1930 1931 PetscCall(VecFlag(x, A->factorerrortype)); 1932 if (A->factorerrortype) { 1933 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1934 PetscFunctionReturn(PETSC_SUCCESS); 1935 } 1936 1937 mumps->id.nrhs = 1; 1938 if (mumps->petsc_size > 1) { 1939 if (mumps->ICNTL20 == 10) { 1940 mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */ 1941 PetscCall(VecGetArrayRead(b, &barray)); 1942 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray)); 1943 } else { 1944 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/ 1945 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1946 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1947 if (!mumps->myid) { 1948 PetscCall(VecGetArray(mumps->b_seq, &array)); 1949 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1950 } 1951 } 1952 } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */ 1953 mumps->id.ICNTL(20) = 0; 1954 PetscCall(VecCopy(b, x)); 1955 PetscCall(VecGetArray(x, &array)); 1956 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1957 } 1958 1959 /* 1960 handle condensation step of Schur complement (if any) 1961 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1962 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1963 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1964 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1965 */ 1966 if (mumps->id.size_schur > 0) { 1967 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1968 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1969 second_solve = PETSC_TRUE; 1970 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 1971 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1972 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1973 } 1974 1975 mumps->id.job = JOB_SOLVE; 1976 PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs 1977 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1978 1979 /* handle expansion step of Schur complement (if any) */ 1980 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1981 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 1982 PetscCall(MatMumpsSolveSchur_Private(A)); 1983 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i); 1984 } 1985 1986 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1987 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1988 /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1989 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1990 } 1991 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1992 PetscInt *isol2_loc = NULL; 1993 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1994 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1995 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1996 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1997 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1998 PetscCall(ISDestroy(&is_iden)); 1999 PetscCall(ISDestroy(&is_petsc)); 2000 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 2001 } 2002 2003 PetscScalar *xarray; 2004 PetscCall(VecGetArray(mumps->x_seq, &xarray)); 2005 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray)); 2006 PetscCall(VecRestoreArray(mumps->x_seq, &xarray)); 2007 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2008 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2009 2010 if (mumps->ICNTL20 == 10) { // distributed RHS 2011 PetscCall(VecRestoreArrayRead(b, &barray)); 2012 } else if (!mumps->myid) { // centralized RHS 2013 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 2014 } 2015 } else { 2016 // id.rhs has the solution in mumps precision 2017 PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array)); 2018 PetscCall(VecRestoreArray(x, &array)); 2019 } 2020 2021 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2022 PetscFunctionReturn(PETSC_SUCCESS); 2023 } 2024 2025 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 2026 { 2027 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2028 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2029 2030 PetscFunctionBegin; 2031 mumps->id.ICNTL(9) = 0; 2032 PetscCall(MatSolve_MUMPS(A, b, x)); 2033 mumps->id.ICNTL(9) = value; 2034 PetscFunctionReturn(PETSC_SUCCESS); 2035 } 2036 2037 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 2038 { 2039 Mat Bt = NULL; 2040 PetscBool denseX, denseB, flg, flgT; 2041 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2042 PetscInt i, nrhs, M, nrhsM; 2043 PetscScalar *array; 2044 const PetscScalar *barray; 2045 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 2046 PetscMUMPSInt *isol_loc, *isol_loc_save; 2047 PetscScalar *sol_loc; 2048 void *sol_loc_save; 2049 PetscCount sol_loc_len_save; 2050 IS is_to, is_from; 2051 PetscInt k, proc, j, m, myrstart; 2052 const PetscInt *rstart; 2053 Vec v_mpi, msol_loc; 2054 VecScatter scat_sol; 2055 Vec b_seq; 2056 VecScatter scat_rhs; 2057 PetscScalar *aa; 2058 PetscInt spnr, *ia, *ja; 2059 Mat_MPIAIJ *b = NULL; 2060 2061 PetscFunctionBegin; 2062 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 2063 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 2064 2065 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 2066 2067 if (denseB) { 2068 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 2069 mumps->id.ICNTL(20) = 0; /* dense RHS */ 2070 } else { /* sparse B */ 2071 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 2072 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 2073 PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 2074 PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 2075 /* input B is transpose of actual RHS matrix, 2076 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 2077 PetscCall(MatTransposeGetMat(B, &Bt)); 2078 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 2079 } 2080 2081 PetscCall(MatGetSize(B, &M, &nrhs)); 2082 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 2083 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 2084 mumps->id.lrhs = (PetscMUMPSInt)M; 2085 2086 if (mumps->petsc_size == 1) { // handle this easy case specially and return early 2087 PetscScalar *aa; 2088 PetscInt spnr, *ia, *ja; 2089 PetscBool second_solve = PETSC_FALSE; 2090 2091 PetscCall(MatDenseGetArray(X, &array)); 2092 if (denseB) { 2093 /* copy B to X */ 2094 PetscCall(MatDenseGetArrayRead(B, &barray)); 2095 PetscCall(PetscArraycpy(array, barray, nrhsM)); 2096 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2097 } else { /* sparse B */ 2098 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 2099 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2100 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2101 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2102 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2103 } 2104 PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2105 2106 /* handle condensation step of Schur complement (if any) */ 2107 if (mumps->id.size_schur > 0) { 2108 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 2109 second_solve = PETSC_TRUE; 2110 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 2111 mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */ 2112 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 2113 } 2114 2115 mumps->id.job = JOB_SOLVE; 2116 PetscMUMPS_c(mumps); 2117 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2118 2119 /* handle expansion step of Schur complement (if any) */ 2120 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 2121 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 2122 PetscCall(MatMumpsSolveSchur_Private(A)); 2123 for (j = 0; j < nrhs; ++j) 2124 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs); 2125 } 2126 2127 if (!denseB) { /* sparse B, restore ia, ja */ 2128 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 2129 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2130 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2131 } 2132 2133 // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X. 2134 PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array)); 2135 PetscCall(MatDenseRestoreArray(X, &array)); 2136 PetscFunctionReturn(PETSC_SUCCESS); 2137 } 2138 2139 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 2140 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 2141 2142 /* create msol_loc to hold mumps local solution */ 2143 isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */ 2144 sol_loc_save = mumps->id.sol_loc; 2145 sol_loc_len_save = mumps->id.sol_loc_len; 2146 mumps->id.isol_loc = NULL; // an init state 2147 mumps->id.sol_loc = NULL; 2148 mumps->id.sol_loc_len = 0; 2149 2150 lsol_loc = mumps->id.lsol_loc; 2151 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 2152 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 2153 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2154 mumps->id.isol_loc = isol_loc; 2155 2156 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 2157 2158 if (denseB) { 2159 if (mumps->ICNTL20 == 10) { 2160 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 2161 PetscCall(MatDenseGetArrayRead(B, &barray)); 2162 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc 2163 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2164 PetscCall(MatGetLocalSize(B, &m, NULL)); 2165 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X 2166 } else { 2167 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 2168 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 2169 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 2170 0, re-arrange B into desired order, which is a local operation. 2171 */ 2172 2173 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 2174 /* wrap dense rhs matrix B into a vector v_mpi */ 2175 PetscCall(MatGetLocalSize(B, &m, NULL)); 2176 PetscCall(MatDenseGetArrayRead(B, &barray)); 2177 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2178 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2179 2180 /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */ 2181 if (!mumps->myid) { 2182 PetscInt *idx; 2183 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 2184 PetscCall(PetscMalloc1(nrhsM, &idx)); 2185 PetscCall(MatGetOwnershipRanges(B, &rstart)); 2186 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 2187 for (j = 0; j < nrhs; j++) { 2188 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 2189 } 2190 } 2191 2192 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 2193 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 2194 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 2195 } else { 2196 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 2197 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 2198 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 2199 } 2200 2201 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 2202 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2203 PetscCall(ISDestroy(&is_to)); 2204 PetscCall(ISDestroy(&is_from)); 2205 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2206 2207 if (!mumps->myid) { /* define rhs on the host */ 2208 PetscCall(VecGetArrayRead(b_seq, &barray)); 2209 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2210 PetscCall(VecRestoreArrayRead(b_seq, &barray)); 2211 } 2212 } 2213 } else { /* sparse B */ 2214 b = (Mat_MPIAIJ *)Bt->data; 2215 2216 /* wrap dense X into a vector v_mpi */ 2217 PetscCall(MatGetLocalSize(X, &m, NULL)); 2218 PetscCall(MatDenseGetArrayRead(X, &barray)); 2219 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2220 PetscCall(MatDenseRestoreArrayRead(X, &barray)); 2221 2222 if (!mumps->myid) { 2223 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 2224 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2225 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2226 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2227 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2228 } else { 2229 mumps->id.irhs_ptr = NULL; 2230 mumps->id.irhs_sparse = NULL; 2231 mumps->id.nz_rhs = 0; 2232 if (mumps->id.rhs_sparse_len) { 2233 PetscCall(PetscFree(mumps->id.rhs_sparse)); 2234 mumps->id.rhs_sparse_len = 0; 2235 } 2236 } 2237 } 2238 2239 /* solve phase */ 2240 mumps->id.job = JOB_SOLVE; 2241 PetscMUMPS_c(mumps); 2242 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2243 2244 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 2245 PetscCall(MatDenseGetArray(X, &array)); 2246 PetscCall(VecPlaceArray(v_mpi, array)); 2247 2248 /* create scatter scat_sol */ 2249 PetscCall(MatGetOwnershipRanges(X, &rstart)); 2250 /* iidx: index for scatter mumps solution to PETSc X */ 2251 2252 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 2253 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 2254 for (i = 0; i < lsol_loc; i++) { 2255 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 2256 2257 for (proc = 0; proc < mumps->petsc_size; proc++) { 2258 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 2259 myrstart = rstart[proc]; 2260 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 2261 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 2262 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 2263 break; 2264 } 2265 } 2266 2267 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 2268 } 2269 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 2270 PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[] 2271 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 2272 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2273 PetscCall(ISDestroy(&is_from)); 2274 PetscCall(ISDestroy(&is_to)); 2275 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2276 PetscCall(MatDenseRestoreArray(X, &array)); 2277 2278 if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers 2279 mumps->id.sol_loc_len = 0; 2280 PetscCall(PetscFree(mumps->id.sol_loc)); 2281 } 2282 2283 // restore old values 2284 mumps->id.sol_loc = sol_loc_save; 2285 mumps->id.sol_loc_len = sol_loc_len_save; 2286 mumps->id.isol_loc = isol_loc_save; 2287 2288 PetscCall(PetscFree2(sol_loc, isol_loc)); 2289 PetscCall(PetscFree(idxx)); 2290 PetscCall(VecDestroy(&msol_loc)); 2291 PetscCall(VecDestroy(&v_mpi)); 2292 if (!denseB) { 2293 if (!mumps->myid) { 2294 b = (Mat_MPIAIJ *)Bt->data; 2295 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 2296 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2297 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2298 } 2299 } else { 2300 if (mumps->ICNTL20 == 0) { 2301 PetscCall(VecDestroy(&b_seq)); 2302 PetscCall(VecScatterDestroy(&scat_rhs)); 2303 } 2304 } 2305 PetscCall(VecScatterDestroy(&scat_sol)); 2306 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2307 PetscFunctionReturn(PETSC_SUCCESS); 2308 } 2309 2310 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 2311 { 2312 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2313 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2314 2315 PetscFunctionBegin; 2316 mumps->id.ICNTL(9) = 0; 2317 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2318 mumps->id.ICNTL(9) = value; 2319 PetscFunctionReturn(PETSC_SUCCESS); 2320 } 2321 2322 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 2323 { 2324 PetscBool flg; 2325 Mat B; 2326 2327 PetscFunctionBegin; 2328 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2329 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 2330 2331 /* Create B=Bt^T that uses Bt's data structure */ 2332 PetscCall(MatCreateTranspose(Bt, &B)); 2333 2334 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2335 PetscCall(MatDestroy(&B)); 2336 PetscFunctionReturn(PETSC_SUCCESS); 2337 } 2338 2339 #if !defined(PETSC_USE_COMPLEX) 2340 /* 2341 input: 2342 F: numeric factor 2343 output: 2344 nneg: total number of negative pivots 2345 nzero: total number of zero pivots 2346 npos: (global dimension of F) - nneg - nzero 2347 */ 2348 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 2349 { 2350 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2351 PetscMPIInt size; 2352 2353 PetscFunctionBegin; 2354 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 2355 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 2356 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 2357 2358 if (nneg) *nneg = mumps->id.INFOG(12); 2359 if (nzero || npos) { 2360 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2361 if (nzero) *nzero = mumps->id.INFOG(28); 2362 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 2363 } 2364 PetscFunctionReturn(PETSC_SUCCESS); 2365 } 2366 #endif 2367 2368 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 2369 { 2370 PetscMPIInt nreqs; 2371 PetscMUMPSInt *irn, *jcn; 2372 PetscMPIInt count; 2373 PetscCount totnnz, remain; 2374 const PetscInt osize = mumps->omp_comm_size; 2375 PetscScalar *val; 2376 2377 PetscFunctionBegin; 2378 if (osize > 1) { 2379 if (reuse == MAT_INITIAL_MATRIX) { 2380 /* master first gathers counts of nonzeros to receive */ 2381 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 2382 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 2383 2384 /* Then each computes number of send/recvs */ 2385 if (mumps->is_omp_master) { 2386 /* Start from 1 since self communication is not done in MPI */ 2387 nreqs = 0; 2388 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 2389 } else { 2390 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 2391 } 2392 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 2393 2394 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 2395 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 2396 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 2397 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 2398 */ 2399 nreqs = 0; /* counter for actual send/recvs */ 2400 if (mumps->is_omp_master) { 2401 totnnz = 0; 2402 2403 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 2404 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 2405 PetscCall(PetscMalloc1(totnnz, &val)); 2406 2407 /* Self communication */ 2408 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 2409 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 2410 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 2411 2412 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 2413 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 2414 PetscCall(PetscFree(mumps->val_alloc)); 2415 mumps->nnz = totnnz; 2416 mumps->irn = irn; 2417 mumps->jcn = jcn; 2418 mumps->val = mumps->val_alloc = val; 2419 2420 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2421 jcn += mumps->recvcount[0]; 2422 val += mumps->recvcount[0]; 2423 2424 /* Remote communication */ 2425 for (PetscMPIInt i = 1; i < osize; i++) { 2426 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2427 remain = mumps->recvcount[i] - count; 2428 while (count > 0) { 2429 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2430 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2431 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2432 irn += count; 2433 jcn += count; 2434 val += count; 2435 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2436 remain -= count; 2437 } 2438 } 2439 } else { 2440 irn = mumps->irn; 2441 jcn = mumps->jcn; 2442 val = mumps->val; 2443 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2444 remain = mumps->nnz - count; 2445 while (count > 0) { 2446 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2447 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2448 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2449 irn += count; 2450 jcn += count; 2451 val += count; 2452 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2453 remain -= count; 2454 } 2455 } 2456 } else { 2457 nreqs = 0; 2458 if (mumps->is_omp_master) { 2459 val = mumps->val + mumps->recvcount[0]; 2460 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 2461 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2462 remain = mumps->recvcount[i] - count; 2463 while (count > 0) { 2464 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2465 val += count; 2466 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2467 remain -= count; 2468 } 2469 } 2470 } else { 2471 val = mumps->val; 2472 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2473 remain = mumps->nnz - count; 2474 while (count > 0) { 2475 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2476 val += count; 2477 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2478 remain -= count; 2479 } 2480 } 2481 } 2482 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2483 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2484 } 2485 PetscFunctionReturn(PETSC_SUCCESS); 2486 } 2487 2488 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2489 { 2490 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2491 2492 PetscFunctionBegin; 2493 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2494 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2495 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2496 PetscFunctionReturn(PETSC_SUCCESS); 2497 } 2498 2499 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2500 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2501 2502 /* numerical factorization phase */ 2503 mumps->id.job = JOB_FACTNUMERIC; 2504 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2505 if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2506 } else { 2507 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2508 } 2509 2510 if (F->schur) { 2511 const PetscScalar *array; 2512 MUMPS_INT size = mumps->id.size_schur; 2513 PetscCall(MatDenseGetArrayRead(F->schur, &array)); 2514 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur)); 2515 PetscCall(MatDenseRestoreArrayRead(F->schur, &array)); 2516 } 2517 2518 PetscMUMPS_c(mumps); 2519 if (mumps->id.INFOG(1) < 0) { 2520 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2521 if (mumps->id.INFOG(1) == -10) { 2522 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2523 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2524 } else if (mumps->id.INFOG(1) == -13) { 2525 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2526 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2527 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2528 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2529 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2530 } else { 2531 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2532 F->factorerrortype = MAT_FACTOR_OTHER; 2533 } 2534 } 2535 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2536 2537 F->assembled = PETSC_TRUE; 2538 2539 if (F->schur) { /* reset Schur status to unfactored */ 2540 #if defined(PETSC_HAVE_CUDA) 2541 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2542 #endif 2543 PetscScalar *array; 2544 PetscCall(MatDenseGetArray(F->schur, &array)); 2545 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array)); 2546 PetscCall(MatDenseRestoreArray(F->schur, &array)); 2547 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2548 mumps->id.ICNTL(19) = 2; 2549 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2550 } 2551 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2552 } 2553 2554 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2555 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2556 2557 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2558 // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the 2559 // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for 2560 // which the solution (in SOL_loc) is available on the local processor. 2561 // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will 2562 // normally have the same contents for each of these calls. The only exception is the case of 2563 // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non 2564 // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase. 2565 if (mumps->petsc_size > 1) { 2566 PetscInt lsol_loc; 2567 PetscScalar *array; 2568 2569 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2570 if (mumps->x_seq) { 2571 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2572 PetscCall(PetscFree(mumps->id.isol_loc)); 2573 PetscCall(VecDestroy(&mumps->x_seq)); 2574 } 2575 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2576 PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc)); 2577 PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq)); 2578 PetscCall(VecGetArray(mumps->x_seq, &array)); 2579 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2580 PetscCall(VecRestoreArray(mumps->x_seq, &array)); 2581 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2582 } 2583 PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2))); 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 /* Sets MUMPS options from the options database */ 2588 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2589 { 2590 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2591 PetscReal cntl; 2592 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2593 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2594 PetscBool flg = PETSC_FALSE; 2595 PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1); 2596 void *arr; 2597 2598 PetscFunctionBegin; 2599 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2600 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2601 PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE; 2602 PetscInt nthreads = 0; 2603 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2604 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2605 PetscMUMPSInt nblk, *blkvar, *blkptr; 2606 2607 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2608 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2609 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2610 2611 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2612 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2613 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2614 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2615 if (mumps->use_petsc_omp_support) { 2616 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2617 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2618 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2619 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2620 #else 2621 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2622 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2623 #endif 2624 } else { 2625 mumps->omp_comm = PETSC_COMM_SELF; 2626 mumps->mumps_comm = mumps->petsc_comm; 2627 mumps->is_omp_master = PETSC_TRUE; 2628 } 2629 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2630 mumps->reqs = NULL; 2631 mumps->tag = 0; 2632 2633 if (mumps->mumps_comm != MPI_COMM_NULL) { 2634 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2635 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2636 MPI_Comm comm; 2637 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2638 mumps->mumps_comm = comm; 2639 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2640 } 2641 2642 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2643 mumps->id.job = JOB_INIT; 2644 mumps->id.par = 1; /* host participates factorizaton and solve */ 2645 mumps->id.sym = mumps->sym; 2646 2647 size = mumps->id.size_schur; 2648 arr = mumps->id.schur; 2649 listvar_schur = mumps->id.listvar_schur; 2650 nblk = mumps->id.nblk; 2651 blkvar = mumps->id.blkvar; 2652 blkptr = mumps->id.blkptr; 2653 if (PetscDefined(USE_DEBUG)) { 2654 for (PetscInt i = 0; i < size; i++) 2655 PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1, 2656 A->rmap->N); 2657 } 2658 2659 PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL)); 2660 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]); 2661 PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar"); 2662 PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision)); 2663 2664 PetscMUMPS_c(mumps); 2665 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2666 2667 /* set PETSc-MUMPS default options - override MUMPS default */ 2668 mumps->id.ICNTL(3) = 0; 2669 mumps->id.ICNTL(4) = 0; 2670 if (mumps->petsc_size == 1) { 2671 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2672 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2673 } else { 2674 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2675 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2676 } 2677 if (nblk && blkptr) { 2678 mumps->id.ICNTL(15) = 1; 2679 mumps->id.nblk = nblk; 2680 mumps->id.blkvar = blkvar; 2681 mumps->id.blkptr = blkptr; 2682 } else mumps->id.ICNTL(15) = 0; 2683 2684 /* restore cached ICNTL and CNTL values */ 2685 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2686 for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]); 2687 2688 PetscCall(PetscFree(mumps->ICNTL_pre)); 2689 PetscCall(PetscFree(mumps->CNTL_pre)); 2690 2691 if (schur) { 2692 mumps->id.size_schur = size; 2693 mumps->id.schur_lld = size; 2694 mumps->id.schur = arr; 2695 mumps->id.listvar_schur = listvar_schur; 2696 if (mumps->petsc_size > 1) { 2697 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2698 2699 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2700 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2701 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm)); 2702 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2703 } else { 2704 if (F->factortype == MAT_FACTOR_LU) { 2705 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2706 } else { 2707 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2708 } 2709 } 2710 mumps->id.ICNTL(26) = -1; 2711 } 2712 2713 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2714 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2715 */ 2716 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2717 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); 2718 2719 mumps->scat_rhs = NULL; 2720 mumps->scat_sol = NULL; 2721 } 2722 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2723 if (flg) mumps->id.ICNTL(1) = icntl; 2724 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2725 if (flg) mumps->id.ICNTL(2) = icntl; 2726 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2727 if (flg) mumps->id.ICNTL(3) = icntl; 2728 2729 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2730 if (flg) mumps->id.ICNTL(4) = icntl; 2731 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2732 2733 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2734 if (flg) mumps->id.ICNTL(6) = icntl; 2735 2736 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2737 if (flg) { 2738 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2739 mumps->id.ICNTL(7) = icntl; 2740 } 2741 2742 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2743 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2744 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2745 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2746 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2747 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2748 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2749 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2750 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2751 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2752 if (flg) { 2753 if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2754 else if (mumps->id.ICNTL(15) > 0) { 2755 const PetscInt *bsizes; 2756 PetscInt nblocks, p, *blkptr = NULL; 2757 PetscMPIInt *recvcounts, *displs, n; 2758 PetscMPIInt rank, size = 0; 2759 2760 PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 2761 flg = PETSC_TRUE; 2762 for (p = 0; p < nblocks; ++p) { 2763 if (bsizes[p] > 1) break; 2764 } 2765 if (p == nblocks) flg = PETSC_FALSE; 2766 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 2767 if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 2768 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2769 if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2770 PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 2771 PetscCall(PetscMPIIntCast(nblocks, &n)); 2772 PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 2773 for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 2774 PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 2775 PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2776 PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2777 if (rank == 0) { 2778 blkptr[0] = 1; 2779 for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 2780 PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 2781 } 2782 PetscCall(PetscFree2(recvcounts, displs)); 2783 PetscCall(PetscFree(blkptr)); 2784 } 2785 } 2786 } 2787 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2788 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2789 PetscCall(MatDestroy(&F->schur)); 2790 PetscCall(MatMumpsResetSchur_Private(mumps)); 2791 } 2792 2793 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2794 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2795 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2796 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2797 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2798 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2799 */ 2800 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2801 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2802 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2803 #endif 2804 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2805 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2806 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2807 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2808 #endif 2809 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2810 2811 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2812 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2813 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2814 if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 2815 2816 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2817 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2818 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2819 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2820 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2821 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2822 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2823 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2824 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2825 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2826 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2827 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2828 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2829 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2830 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL)); 2831 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2832 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2833 2834 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg)); 2835 if (flg) ID_CNTL_SET(mumps->id, 1, cntl); 2836 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg)); 2837 if (flg) ID_CNTL_SET(mumps->id, 2, cntl); 2838 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg)); 2839 if (flg) ID_CNTL_SET(mumps->id, 3, cntl); 2840 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg)); 2841 if (flg) ID_CNTL_SET(mumps->id, 4, cntl); 2842 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg)); 2843 if (flg) ID_CNTL_SET(mumps->id, 5, cntl); 2844 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg)); 2845 if (flg) ID_CNTL_SET(mumps->id, 7, cntl); 2846 2847 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2848 2849 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2850 if (ninfo) { 2851 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2852 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2853 mumps->ninfo = ninfo; 2854 for (i = 0; i < ninfo; i++) { 2855 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2856 mumps->info[i] = info[i]; 2857 } 2858 } 2859 PetscOptionsEnd(); 2860 PetscFunctionReturn(PETSC_SUCCESS); 2861 } 2862 2863 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2864 { 2865 PetscFunctionBegin; 2866 if (mumps->id.INFOG(1) < 0) { 2867 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2868 if (mumps->id.INFOG(1) == -6) { 2869 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2870 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2871 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2872 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2873 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2874 } else { 2875 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2876 F->factorerrortype = MAT_FACTOR_OTHER; 2877 } 2878 } 2879 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2880 PetscFunctionReturn(PETSC_SUCCESS); 2881 } 2882 2883 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2884 { 2885 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2886 Vec b; 2887 const PetscInt M = A->rmap->N; 2888 2889 PetscFunctionBegin; 2890 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2891 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2892 PetscFunctionReturn(PETSC_SUCCESS); 2893 } 2894 2895 /* Set MUMPS options from the options database */ 2896 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2897 2898 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2899 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2900 2901 /* analysis phase */ 2902 mumps->id.job = JOB_FACTSYMBOLIC; 2903 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2904 switch (mumps->id.ICNTL(18)) { 2905 case 0: /* centralized assembled matrix input */ 2906 if (!mumps->myid) { 2907 mumps->id.nnz = mumps->nnz; 2908 mumps->id.irn = mumps->irn; 2909 mumps->id.jcn = mumps->jcn; 2910 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2911 if (r && mumps->id.ICNTL(7) == 7) { 2912 mumps->id.ICNTL(7) = 1; 2913 if (!mumps->myid) { 2914 const PetscInt *idx; 2915 PetscInt i; 2916 2917 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2918 PetscCall(ISGetIndices(r, &idx)); 2919 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2920 PetscCall(ISRestoreIndices(r, &idx)); 2921 } 2922 } 2923 } 2924 break; 2925 case 3: /* distributed assembled matrix input (size>1) */ 2926 mumps->id.nnz_loc = mumps->nnz; 2927 mumps->id.irn_loc = mumps->irn; 2928 mumps->id.jcn_loc = mumps->jcn; 2929 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2930 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2931 PetscCall(MatCreateVecs(A, NULL, &b)); 2932 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2933 PetscCall(VecDestroy(&b)); 2934 } 2935 break; 2936 } 2937 PetscMUMPS_c(mumps); 2938 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2939 2940 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2941 F->ops->solve = MatSolve_MUMPS; 2942 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2943 F->ops->matsolve = MatMatSolve_MUMPS; 2944 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2945 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2946 2947 mumps->matstruc = SAME_NONZERO_PATTERN; 2948 PetscFunctionReturn(PETSC_SUCCESS); 2949 } 2950 2951 /* Note the PETSc r and c permutations are ignored */ 2952 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2953 { 2954 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2955 Vec b; 2956 const PetscInt M = A->rmap->N; 2957 2958 PetscFunctionBegin; 2959 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2960 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2961 PetscFunctionReturn(PETSC_SUCCESS); 2962 } 2963 2964 /* Set MUMPS options from the options database */ 2965 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2966 2967 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2968 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2969 2970 /* analysis phase */ 2971 mumps->id.job = JOB_FACTSYMBOLIC; 2972 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2973 switch (mumps->id.ICNTL(18)) { 2974 case 0: /* centralized assembled matrix input */ 2975 if (!mumps->myid) { 2976 mumps->id.nnz = mumps->nnz; 2977 mumps->id.irn = mumps->irn; 2978 mumps->id.jcn = mumps->jcn; 2979 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2980 } 2981 break; 2982 case 3: /* distributed assembled matrix input (size>1) */ 2983 mumps->id.nnz_loc = mumps->nnz; 2984 mumps->id.irn_loc = mumps->irn; 2985 mumps->id.jcn_loc = mumps->jcn; 2986 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2987 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2988 PetscCall(MatCreateVecs(A, NULL, &b)); 2989 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2990 PetscCall(VecDestroy(&b)); 2991 } 2992 break; 2993 } 2994 PetscMUMPS_c(mumps); 2995 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2996 2997 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2998 F->ops->solve = MatSolve_MUMPS; 2999 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 3000 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3001 3002 mumps->matstruc = SAME_NONZERO_PATTERN; 3003 PetscFunctionReturn(PETSC_SUCCESS); 3004 } 3005 3006 /* Note the PETSc r permutation and factor info are ignored */ 3007 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 3008 { 3009 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3010 Vec b; 3011 const PetscInt M = A->rmap->N; 3012 3013 PetscFunctionBegin; 3014 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 3015 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 3016 PetscFunctionReturn(PETSC_SUCCESS); 3017 } 3018 3019 /* Set MUMPS options from the options database */ 3020 PetscCall(MatSetFromOptions_MUMPS(F, A)); 3021 3022 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 3023 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 3024 3025 /* analysis phase */ 3026 mumps->id.job = JOB_FACTSYMBOLIC; 3027 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 3028 switch (mumps->id.ICNTL(18)) { 3029 case 0: /* centralized assembled matrix input */ 3030 if (!mumps->myid) { 3031 mumps->id.nnz = mumps->nnz; 3032 mumps->id.irn = mumps->irn; 3033 mumps->id.jcn = mumps->jcn; 3034 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 3035 } 3036 break; 3037 case 3: /* distributed assembled matrix input (size>1) */ 3038 mumps->id.nnz_loc = mumps->nnz; 3039 mumps->id.irn_loc = mumps->irn; 3040 mumps->id.jcn_loc = mumps->jcn; 3041 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 3042 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 3043 PetscCall(MatCreateVecs(A, NULL, &b)); 3044 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 3045 PetscCall(VecDestroy(&b)); 3046 } 3047 break; 3048 } 3049 PetscMUMPS_c(mumps); 3050 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3051 3052 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 3053 F->ops->solve = MatSolve_MUMPS; 3054 F->ops->solvetranspose = MatSolve_MUMPS; 3055 F->ops->matsolve = MatMatSolve_MUMPS; 3056 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 3057 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3058 #if defined(PETSC_USE_COMPLEX) 3059 F->ops->getinertia = NULL; 3060 #else 3061 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 3062 #endif 3063 3064 mumps->matstruc = SAME_NONZERO_PATTERN; 3065 PetscFunctionReturn(PETSC_SUCCESS); 3066 } 3067 3068 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 3069 { 3070 PetscBool isascii; 3071 PetscViewerFormat format; 3072 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3073 3074 PetscFunctionBegin; 3075 /* check if matrix is mumps type */ 3076 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 3077 3078 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 3079 if (isascii) { 3080 PetscCall(PetscViewerGetFormat(viewer, &format)); 3081 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3082 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 3083 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3084 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 3085 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 3086 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 3087 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 3088 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 3089 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 3090 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 3091 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 3092 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 3093 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 3094 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 3095 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 3096 if (mumps->id.ICNTL(11) > 0) { 3097 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4))); 3098 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5))); 3099 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6))); 3100 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8))); 3101 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9))); 3102 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11))); 3103 } 3104 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 3105 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 3106 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 3107 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 3108 /* ICNTL(15-17) not used */ 3109 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 3110 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 3111 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 3112 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 3113 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 3114 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 3115 3116 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 3117 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 3118 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 3119 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 3120 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 3121 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 3122 3123 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 3124 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 3125 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 3126 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 3127 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 3128 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 3129 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 3130 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 3131 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49))); 3132 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 3133 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 3134 3135 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1))); 3136 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2))); 3137 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3))); 3138 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4))); 3139 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5))); 3140 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7))); 3141 3142 /* information local to each processor */ 3143 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 3144 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 3145 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1))); 3146 PetscCall(PetscViewerFlush(viewer)); 3147 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 3148 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2))); 3149 PetscCall(PetscViewerFlush(viewer)); 3150 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 3151 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3))); 3152 PetscCall(PetscViewerFlush(viewer)); 3153 3154 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 3155 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 3156 PetscCall(PetscViewerFlush(viewer)); 3157 3158 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 3159 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 3160 PetscCall(PetscViewerFlush(viewer)); 3161 3162 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 3163 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 3164 PetscCall(PetscViewerFlush(viewer)); 3165 3166 if (mumps->ninfo && mumps->ninfo <= 80) { 3167 PetscInt i; 3168 for (i = 0; i < mumps->ninfo; i++) { 3169 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 3170 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 3171 PetscCall(PetscViewerFlush(viewer)); 3172 } 3173 } 3174 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 3175 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 3176 3177 if (mumps->myid == 0) { /* information from the host */ 3178 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1))); 3179 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2))); 3180 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3))); 3181 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34))); 3182 3183 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 3184 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 3185 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 3186 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 3187 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 3188 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 3189 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 3190 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 3191 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 3192 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 3193 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 3194 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 3195 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 3196 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 3197 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 3198 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 3199 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 3200 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 3201 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 3202 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 3203 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 3204 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 3205 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 3206 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 3207 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 3208 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 3209 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 3210 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 3211 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 3212 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 3213 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 3214 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 3215 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 3216 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 3217 } 3218 } 3219 } 3220 PetscFunctionReturn(PETSC_SUCCESS); 3221 } 3222 3223 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 3224 { 3225 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3226 3227 PetscFunctionBegin; 3228 info->block_size = 1.0; 3229 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3230 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3231 info->nz_unneeded = 0.0; 3232 info->assemblies = 0.0; 3233 info->mallocs = 0.0; 3234 info->memory = 0.0; 3235 info->fill_ratio_given = 0; 3236 info->fill_ratio_needed = 0; 3237 info->factor_mallocs = 0; 3238 PetscFunctionReturn(PETSC_SUCCESS); 3239 } 3240 3241 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 3242 { 3243 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3244 const PetscScalar *arr; 3245 const PetscInt *idxs; 3246 PetscInt size, i; 3247 3248 PetscFunctionBegin; 3249 PetscCall(ISGetLocalSize(is, &size)); 3250 /* Schur complement matrix */ 3251 PetscCall(MatDestroy(&F->schur)); 3252 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 3253 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 3254 // don't allocate mumps->id.schur[] now as its precision is yet to know 3255 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 3256 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 3257 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 3258 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 3259 3260 /* MUMPS expects Fortran style indices */ 3261 PetscCall(PetscFree(mumps->id.listvar_schur)); 3262 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 3263 PetscCall(ISGetIndices(is, &idxs)); 3264 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 3265 PetscCall(ISRestoreIndices(is, &idxs)); 3266 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 3267 if (mumps->id.icntl) mumps->id.ICNTL(26) = -1; 3268 else mumps->ICNTL26 = -1; 3269 PetscFunctionReturn(PETSC_SUCCESS); 3270 } 3271 3272 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 3273 { 3274 Mat St; 3275 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3276 PetscScalar *array; 3277 PetscInt i, j, N = mumps->id.size_schur; 3278 3279 PetscFunctionBegin; 3280 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 3281 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 3282 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 3283 PetscCall(MatSetType(St, MATDENSE)); 3284 PetscCall(MatSetUp(St)); 3285 PetscCall(MatDenseGetArray(St, &array)); 3286 if (!mumps->sym) { /* MUMPS always return a full matrix */ 3287 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 3288 for (i = 0; i < N; i++) { 3289 for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3290 } 3291 } else { /* stored by columns */ 3292 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3293 } 3294 } else { /* either full or lower-triangular (not packed) */ 3295 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 3296 for (i = 0; i < N; i++) { 3297 for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3298 } 3299 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 3300 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3301 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 3302 for (i = 0; i < N; i++) { 3303 for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3304 } 3305 } 3306 } 3307 PetscCall(MatDenseRestoreArray(St, &array)); 3308 *S = St; 3309 PetscFunctionReturn(PETSC_SUCCESS); 3310 } 3311 3312 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 3313 { 3314 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3315 3316 PetscFunctionBegin; 3317 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 3318 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 3319 for (i = 0; i < nICNTL_pre; ++i) 3320 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 3321 if (i == nICNTL_pre) { /* not already cached */ 3322 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 3323 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 3324 mumps->ICNTL_pre[0]++; 3325 } 3326 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 3327 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 3328 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 3329 PetscFunctionReturn(PETSC_SUCCESS); 3330 } 3331 3332 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 3333 { 3334 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3335 3336 PetscFunctionBegin; 3337 if (mumps->id.job == JOB_NULL) { 3338 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 3339 *ival = 0; 3340 for (i = 0; i < nICNTL_pre; ++i) { 3341 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 3342 } 3343 } else *ival = mumps->id.ICNTL(icntl); 3344 PetscFunctionReturn(PETSC_SUCCESS); 3345 } 3346 3347 /*@ 3348 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3349 3350 Logically Collective 3351 3352 Input Parameters: 3353 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3354 . icntl - index of MUMPS parameter array `ICNTL()` 3355 - ival - value of MUMPS `ICNTL(icntl)` 3356 3357 Options Database Key: 3358 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 3359 3360 Level: beginner 3361 3362 Note: 3363 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3364 3365 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3366 @*/ 3367 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 3368 { 3369 PetscFunctionBegin; 3370 PetscValidType(F, 1); 3371 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3372 PetscValidLogicalCollectiveInt(F, icntl, 2); 3373 PetscValidLogicalCollectiveInt(F, ival, 3); 3374 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3375 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 3376 PetscFunctionReturn(PETSC_SUCCESS); 3377 } 3378 3379 /*@ 3380 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3381 3382 Logically Collective 3383 3384 Input Parameters: 3385 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3386 - icntl - index of MUMPS parameter array ICNTL() 3387 3388 Output Parameter: 3389 . ival - value of MUMPS ICNTL(icntl) 3390 3391 Level: beginner 3392 3393 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3394 @*/ 3395 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 3396 { 3397 PetscFunctionBegin; 3398 PetscValidType(F, 1); 3399 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3400 PetscValidLogicalCollectiveInt(F, icntl, 2); 3401 PetscAssertPointer(ival, 3); 3402 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3403 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3404 PetscFunctionReturn(PETSC_SUCCESS); 3405 } 3406 3407 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 3408 { 3409 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3410 3411 PetscFunctionBegin; 3412 if (mumps->id.job == JOB_NULL) { 3413 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3414 for (i = 0; i < nCNTL_pre; ++i) 3415 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 3416 if (i == nCNTL_pre) { 3417 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 3418 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 3419 mumps->CNTL_pre[0]++; 3420 } 3421 mumps->CNTL_pre[1 + 2 * i] = icntl; 3422 mumps->CNTL_pre[2 + 2 * i] = val; 3423 } else ID_CNTL_SET(mumps->id, icntl, val); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 3427 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 3428 { 3429 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3430 3431 PetscFunctionBegin; 3432 if (mumps->id.job == JOB_NULL) { 3433 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3434 *val = 0.0; 3435 for (i = 0; i < nCNTL_pre; ++i) { 3436 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 3437 } 3438 } else *val = ID_CNTL_GET(mumps->id, icntl); 3439 PetscFunctionReturn(PETSC_SUCCESS); 3440 } 3441 3442 /*@ 3443 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3444 3445 Logically Collective 3446 3447 Input Parameters: 3448 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3449 . icntl - index of MUMPS parameter array `CNTL()` 3450 - val - value of MUMPS `CNTL(icntl)` 3451 3452 Options Database Key: 3453 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 3454 3455 Level: beginner 3456 3457 Note: 3458 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3459 3460 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3461 @*/ 3462 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3463 { 3464 PetscFunctionBegin; 3465 PetscValidType(F, 1); 3466 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3467 PetscValidLogicalCollectiveInt(F, icntl, 2); 3468 PetscValidLogicalCollectiveReal(F, val, 3); 3469 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3470 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 3471 PetscFunctionReturn(PETSC_SUCCESS); 3472 } 3473 3474 /*@ 3475 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3476 3477 Logically Collective 3478 3479 Input Parameters: 3480 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3481 - icntl - index of MUMPS parameter array CNTL() 3482 3483 Output Parameter: 3484 . val - value of MUMPS CNTL(icntl) 3485 3486 Level: beginner 3487 3488 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3489 @*/ 3490 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3491 { 3492 PetscFunctionBegin; 3493 PetscValidType(F, 1); 3494 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3495 PetscValidLogicalCollectiveInt(F, icntl, 2); 3496 PetscAssertPointer(val, 3); 3497 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3498 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3499 PetscFunctionReturn(PETSC_SUCCESS); 3500 } 3501 3502 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3503 { 3504 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3505 3506 PetscFunctionBegin; 3507 *info = mumps->id.INFO(icntl); 3508 PetscFunctionReturn(PETSC_SUCCESS); 3509 } 3510 3511 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3512 { 3513 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3514 3515 PetscFunctionBegin; 3516 *infog = mumps->id.INFOG(icntl); 3517 PetscFunctionReturn(PETSC_SUCCESS); 3518 } 3519 3520 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3521 { 3522 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3523 3524 PetscFunctionBegin; 3525 *rinfo = ID_RINFO_GET(mumps->id, icntl); 3526 PetscFunctionReturn(PETSC_SUCCESS); 3527 } 3528 3529 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3530 { 3531 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3532 3533 PetscFunctionBegin; 3534 *rinfog = ID_RINFOG_GET(mumps->id, icntl); 3535 PetscFunctionReturn(PETSC_SUCCESS); 3536 } 3537 3538 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 3539 { 3540 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3541 3542 PetscFunctionBegin; 3543 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 3544 *size = 0; 3545 *array = NULL; 3546 if (!mumps->myid) { 3547 *size = mumps->id.INFOG(28); 3548 PetscCall(PetscMalloc1(*size, array)); 3549 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 3550 } 3551 PetscFunctionReturn(PETSC_SUCCESS); 3552 } 3553 3554 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3555 { 3556 Mat Bt = NULL, Btseq = NULL; 3557 PetscBool flg; 3558 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3559 PetscScalar *aa; 3560 PetscInt spnr, *ia, *ja, M, nrhs; 3561 3562 PetscFunctionBegin; 3563 PetscAssertPointer(spRHS, 2); 3564 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3565 PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3566 PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 3567 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3568 3569 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3570 3571 if (mumps->petsc_size > 1) { 3572 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3573 Btseq = b->A; 3574 } else { 3575 Btseq = Bt; 3576 } 3577 3578 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3579 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3580 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3581 mumps->id.rhs = NULL; 3582 3583 if (!mumps->myid) { 3584 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3585 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3586 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3587 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3588 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 3589 } else { 3590 mumps->id.irhs_ptr = NULL; 3591 mumps->id.irhs_sparse = NULL; 3592 mumps->id.nz_rhs = 0; 3593 if (mumps->id.rhs_sparse_len) { 3594 PetscCall(PetscFree(mumps->id.rhs_sparse)); 3595 mumps->id.rhs_sparse_len = 0; 3596 } 3597 } 3598 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3599 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3600 3601 /* solve phase */ 3602 mumps->id.job = JOB_SOLVE; 3603 PetscMUMPS_c(mumps); 3604 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3605 3606 if (!mumps->myid) { 3607 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3608 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3609 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3610 } 3611 PetscFunctionReturn(PETSC_SUCCESS); 3612 } 3613 3614 /*@ 3615 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3616 3617 Logically Collective 3618 3619 Input Parameter: 3620 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3621 3622 Output Parameter: 3623 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3624 3625 Level: beginner 3626 3627 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3628 @*/ 3629 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3630 { 3631 PetscFunctionBegin; 3632 PetscValidType(F, 1); 3633 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3634 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3635 PetscFunctionReturn(PETSC_SUCCESS); 3636 } 3637 3638 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3639 { 3640 Mat spRHS; 3641 3642 PetscFunctionBegin; 3643 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3644 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3645 PetscCall(MatDestroy(&spRHS)); 3646 PetscFunctionReturn(PETSC_SUCCESS); 3647 } 3648 3649 /*@ 3650 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3651 3652 Logically Collective 3653 3654 Input Parameter: 3655 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3656 3657 Output Parameter: 3658 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3659 3660 Level: beginner 3661 3662 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3663 @*/ 3664 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3665 { 3666 PetscBool flg; 3667 3668 PetscFunctionBegin; 3669 PetscValidType(F, 1); 3670 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3671 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3672 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3673 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3674 PetscFunctionReturn(PETSC_SUCCESS); 3675 } 3676 3677 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3678 { 3679 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3680 3681 PetscFunctionBegin; 3682 if (nblk) { 3683 PetscAssertPointer(blkptr, 4); 3684 PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 3685 PetscCall(PetscFree(mumps->id.blkptr)); 3686 PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 3687 for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3688 // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15). 3689 // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS(). 3690 // mumps->id.ICNTL(15) = 1; 3691 if (blkvar) { 3692 PetscCall(PetscFree(mumps->id.blkvar)); 3693 PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 3694 for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 3695 } 3696 } else { 3697 PetscCall(PetscFree(mumps->id.blkptr)); 3698 PetscCall(PetscFree(mumps->id.blkvar)); 3699 // mumps->id.ICNTL(15) = 0; 3700 mumps->id.nblk = 0; 3701 } 3702 PetscFunctionReturn(PETSC_SUCCESS); 3703 } 3704 3705 /*@ 3706 MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 3707 3708 Not collective, only relevant on the first process of the MPI communicator 3709 3710 Input Parameters: 3711 + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3712 . nblk - the number of blocks 3713 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 3714 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 3715 3716 Level: advanced 3717 3718 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 3719 @*/ 3720 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3721 { 3722 PetscFunctionBegin; 3723 PetscValidType(F, 1); 3724 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3725 PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 3726 PetscFunctionReturn(PETSC_SUCCESS); 3727 } 3728 3729 /*@ 3730 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3731 3732 Logically Collective 3733 3734 Input Parameters: 3735 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3736 - icntl - index of MUMPS parameter array INFO() 3737 3738 Output Parameter: 3739 . ival - value of MUMPS INFO(icntl) 3740 3741 Level: beginner 3742 3743 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3744 @*/ 3745 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3746 { 3747 PetscFunctionBegin; 3748 PetscValidType(F, 1); 3749 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3750 PetscAssertPointer(ival, 3); 3751 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3752 PetscFunctionReturn(PETSC_SUCCESS); 3753 } 3754 3755 /*@ 3756 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3757 3758 Logically Collective 3759 3760 Input Parameters: 3761 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3762 - icntl - index of MUMPS parameter array INFOG() 3763 3764 Output Parameter: 3765 . ival - value of MUMPS INFOG(icntl) 3766 3767 Level: beginner 3768 3769 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3770 @*/ 3771 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3772 { 3773 PetscFunctionBegin; 3774 PetscValidType(F, 1); 3775 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3776 PetscAssertPointer(ival, 3); 3777 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3778 PetscFunctionReturn(PETSC_SUCCESS); 3779 } 3780 3781 /*@ 3782 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3783 3784 Logically Collective 3785 3786 Input Parameters: 3787 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3788 - icntl - index of MUMPS parameter array RINFO() 3789 3790 Output Parameter: 3791 . val - value of MUMPS RINFO(icntl) 3792 3793 Level: beginner 3794 3795 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3796 @*/ 3797 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3798 { 3799 PetscFunctionBegin; 3800 PetscValidType(F, 1); 3801 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3802 PetscAssertPointer(val, 3); 3803 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3804 PetscFunctionReturn(PETSC_SUCCESS); 3805 } 3806 3807 /*@ 3808 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3809 3810 Logically Collective 3811 3812 Input Parameters: 3813 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3814 - icntl - index of MUMPS parameter array RINFOG() 3815 3816 Output Parameter: 3817 . val - value of MUMPS RINFOG(icntl) 3818 3819 Level: beginner 3820 3821 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3822 @*/ 3823 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3824 { 3825 PetscFunctionBegin; 3826 PetscValidType(F, 1); 3827 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3828 PetscAssertPointer(val, 3); 3829 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3830 PetscFunctionReturn(PETSC_SUCCESS); 3831 } 3832 3833 /*@ 3834 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3835 3836 Logically Collective 3837 3838 Input Parameter: 3839 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3840 3841 Output Parameters: 3842 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3843 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3844 for freeing this array. 3845 3846 Level: beginner 3847 3848 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3849 @*/ 3850 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3851 { 3852 PetscFunctionBegin; 3853 PetscValidType(F, 1); 3854 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3855 PetscAssertPointer(size, 2); 3856 PetscAssertPointer(array, 3); 3857 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3858 PetscFunctionReturn(PETSC_SUCCESS); 3859 } 3860 3861 /*MC 3862 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3863 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3864 3865 Works with `MATAIJ` and `MATSBAIJ` matrices 3866 3867 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3868 3869 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3870 See details below. 3871 3872 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3873 3874 Options Database Keys: 3875 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3876 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3877 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3878 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3879 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3880 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3881 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3882 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3883 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3884 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3885 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3886 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3887 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3888 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3889 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3890 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3891 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3892 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3893 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3894 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3895 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3896 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3897 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3898 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3899 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3900 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3901 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3902 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3903 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3904 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3905 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3906 . -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase 3907 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3908 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3909 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3910 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3911 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3912 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3913 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3914 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3915 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3916 3917 Level: beginner 3918 3919 Notes: 3920 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3921 error if the matrix is Hermitian. 3922 3923 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3924 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3925 3926 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3927 the failure with 3928 .vb 3929 KSPGetPC(ksp,&pc); 3930 PCFactorGetMatrix(pc,&mat); 3931 MatMumpsGetInfo(mat,....); 3932 MatMumpsGetInfog(mat,....); etc. 3933 .ve 3934 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3935 3936 MUMPS provides 64-bit integer support in two build modes: 3937 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3938 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3939 3940 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3941 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3942 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3943 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3944 3945 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3946 3947 Two modes to run MUMPS/PETSc with OpenMP 3948 .vb 3949 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3950 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 3951 .ve 3952 3953 .vb 3954 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 3955 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3956 .ve 3957 3958 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3959 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3960 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3961 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3962 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3963 3964 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3965 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3966 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3967 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3968 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3969 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3970 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3971 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3972 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3973 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 3974 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3975 examine the mapping result. 3976 3977 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3978 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3979 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3980 3981 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3982 3983 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3984 M*/ 3985 3986 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3987 { 3988 PetscFunctionBegin; 3989 *type = MATSOLVERMUMPS; 3990 PetscFunctionReturn(PETSC_SUCCESS); 3991 } 3992 3993 /* MatGetFactor for Seq and MPI AIJ matrices */ 3994 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3995 { 3996 Mat B; 3997 Mat_MUMPS *mumps; 3998 PetscBool isSeqAIJ, isDiag, isDense; 3999 PetscMPIInt size; 4000 4001 PetscFunctionBegin; 4002 #if defined(PETSC_USE_COMPLEX) 4003 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4004 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4005 *F = NULL; 4006 PetscFunctionReturn(PETSC_SUCCESS); 4007 } 4008 #endif 4009 /* Create the factorization matrix */ 4010 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 4011 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 4012 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4013 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4014 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4015 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4016 PetscCall(MatSetUp(B)); 4017 4018 PetscCall(PetscNew(&mumps)); 4019 4020 B->ops->view = MatView_MUMPS; 4021 B->ops->getinfo = MatGetInfo_MUMPS; 4022 4023 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4024 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4025 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4026 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4027 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4028 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4029 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4030 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4031 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4032 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4033 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4034 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4035 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4036 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4037 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4038 4039 if (ftype == MAT_FACTOR_LU) { 4040 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4041 B->factortype = MAT_FACTOR_LU; 4042 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 4043 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4044 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4045 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 4046 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4047 mumps->sym = 0; 4048 } else { 4049 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4050 B->factortype = MAT_FACTOR_CHOLESKY; 4051 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 4052 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4053 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4054 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 4055 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4056 #if defined(PETSC_USE_COMPLEX) 4057 mumps->sym = 2; 4058 #else 4059 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4060 else mumps->sym = 2; 4061 #endif 4062 } 4063 4064 /* set solvertype */ 4065 PetscCall(PetscFree(B->solvertype)); 4066 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4067 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4068 if (size == 1) { 4069 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4070 B->canuseordering = PETSC_TRUE; 4071 } 4072 B->ops->destroy = MatDestroy_MUMPS; 4073 B->data = (void *)mumps; 4074 4075 *F = B; 4076 mumps->id.job = JOB_NULL; 4077 mumps->ICNTL_pre = NULL; 4078 mumps->CNTL_pre = NULL; 4079 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4080 PetscFunctionReturn(PETSC_SUCCESS); 4081 } 4082 4083 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 4084 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 4085 { 4086 Mat B; 4087 Mat_MUMPS *mumps; 4088 PetscBool isSeqSBAIJ; 4089 PetscMPIInt size; 4090 4091 PetscFunctionBegin; 4092 #if defined(PETSC_USE_COMPLEX) 4093 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4094 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4095 *F = NULL; 4096 PetscFunctionReturn(PETSC_SUCCESS); 4097 } 4098 #endif 4099 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4100 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4101 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4102 PetscCall(MatSetUp(B)); 4103 4104 PetscCall(PetscNew(&mumps)); 4105 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 4106 if (isSeqSBAIJ) { 4107 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 4108 } else { 4109 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 4110 } 4111 4112 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4113 B->ops->view = MatView_MUMPS; 4114 B->ops->getinfo = MatGetInfo_MUMPS; 4115 4116 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4117 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4118 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4119 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4120 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4121 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4122 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4123 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4124 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4125 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4126 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4127 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4128 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4129 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4130 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4131 4132 B->factortype = MAT_FACTOR_CHOLESKY; 4133 #if defined(PETSC_USE_COMPLEX) 4134 mumps->sym = 2; 4135 #else 4136 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4137 else mumps->sym = 2; 4138 #endif 4139 4140 /* set solvertype */ 4141 PetscCall(PetscFree(B->solvertype)); 4142 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4143 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4144 if (size == 1) { 4145 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4146 B->canuseordering = PETSC_TRUE; 4147 } 4148 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4149 B->ops->destroy = MatDestroy_MUMPS; 4150 B->data = (void *)mumps; 4151 4152 *F = B; 4153 mumps->id.job = JOB_NULL; 4154 mumps->ICNTL_pre = NULL; 4155 mumps->CNTL_pre = NULL; 4156 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4157 PetscFunctionReturn(PETSC_SUCCESS); 4158 } 4159 4160 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 4161 { 4162 Mat B; 4163 Mat_MUMPS *mumps; 4164 PetscBool isSeqBAIJ; 4165 PetscMPIInt size; 4166 4167 PetscFunctionBegin; 4168 /* Create the factorization matrix */ 4169 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 4170 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4171 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4172 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4173 PetscCall(MatSetUp(B)); 4174 4175 PetscCall(PetscNew(&mumps)); 4176 PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 4177 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 4178 B->factortype = MAT_FACTOR_LU; 4179 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 4180 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 4181 mumps->sym = 0; 4182 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4183 4184 B->ops->view = MatView_MUMPS; 4185 B->ops->getinfo = MatGetInfo_MUMPS; 4186 4187 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4188 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4190 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4202 4203 /* set solvertype */ 4204 PetscCall(PetscFree(B->solvertype)); 4205 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4206 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4207 if (size == 1) { 4208 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4209 B->canuseordering = PETSC_TRUE; 4210 } 4211 B->ops->destroy = MatDestroy_MUMPS; 4212 B->data = (void *)mumps; 4213 4214 *F = B; 4215 mumps->id.job = JOB_NULL; 4216 mumps->ICNTL_pre = NULL; 4217 mumps->CNTL_pre = NULL; 4218 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4219 PetscFunctionReturn(PETSC_SUCCESS); 4220 } 4221 4222 /* MatGetFactor for Seq and MPI SELL matrices */ 4223 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 4224 { 4225 Mat B; 4226 Mat_MUMPS *mumps; 4227 PetscBool isSeqSELL; 4228 PetscMPIInt size; 4229 4230 PetscFunctionBegin; 4231 /* Create the factorization matrix */ 4232 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 4233 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4234 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4235 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4236 PetscCall(MatSetUp(B)); 4237 4238 PetscCall(PetscNew(&mumps)); 4239 4240 B->ops->view = MatView_MUMPS; 4241 B->ops->getinfo = MatGetInfo_MUMPS; 4242 4243 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4244 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4245 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4246 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4247 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4248 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4249 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4250 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4251 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4252 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4253 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4254 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4255 4256 PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4257 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4258 B->factortype = MAT_FACTOR_LU; 4259 PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4260 mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 4261 mumps->sym = 0; 4262 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4263 4264 /* set solvertype */ 4265 PetscCall(PetscFree(B->solvertype)); 4266 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4267 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4268 if (size == 1) { 4269 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4270 B->canuseordering = PETSC_TRUE; 4271 } 4272 B->ops->destroy = MatDestroy_MUMPS; 4273 B->data = (void *)mumps; 4274 4275 *F = B; 4276 mumps->id.job = JOB_NULL; 4277 mumps->ICNTL_pre = NULL; 4278 mumps->CNTL_pre = NULL; 4279 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4280 PetscFunctionReturn(PETSC_SUCCESS); 4281 } 4282 4283 /* MatGetFactor for MATNEST matrices */ 4284 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 4285 { 4286 Mat B, **mats; 4287 Mat_MUMPS *mumps; 4288 PetscInt nr, nc; 4289 PetscMPIInt size; 4290 PetscBool flg = PETSC_TRUE; 4291 4292 PetscFunctionBegin; 4293 #if defined(PETSC_USE_COMPLEX) 4294 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4295 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4296 *F = NULL; 4297 PetscFunctionReturn(PETSC_SUCCESS); 4298 } 4299 #endif 4300 4301 /* Return if some condition is not satisfied */ 4302 *F = NULL; 4303 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 4304 if (ftype == MAT_FACTOR_CHOLESKY) { 4305 IS *rows, *cols; 4306 PetscInt *m, *M; 4307 4308 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 4309 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 4310 PetscCall(MatNestGetISs(A, rows, cols)); 4311 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 4312 if (!flg) { 4313 PetscCall(PetscFree2(rows, cols)); 4314 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 4315 PetscFunctionReturn(PETSC_SUCCESS); 4316 } 4317 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 4318 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 4319 for (PetscInt r = 0; flg && r < nr; r++) 4320 for (PetscInt k = r + 1; flg && k < nr; k++) 4321 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 4322 PetscCall(PetscFree2(m, M)); 4323 PetscCall(PetscFree2(rows, cols)); 4324 if (!flg) { 4325 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 4326 PetscFunctionReturn(PETSC_SUCCESS); 4327 } 4328 } 4329 4330 for (PetscInt r = 0; r < nr; r++) { 4331 for (PetscInt c = 0; c < nc; c++) { 4332 Mat sub = mats[r][c]; 4333 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 4334 4335 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 4336 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 4337 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 4338 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 4339 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 4340 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 4341 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 4342 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 4343 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 4344 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4345 if (ftype == MAT_FACTOR_CHOLESKY) { 4346 if (r == c) { 4347 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 4348 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4349 flg = PETSC_FALSE; 4350 } 4351 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4352 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4353 flg = PETSC_FALSE; 4354 } 4355 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4356 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 4357 flg = PETSC_FALSE; 4358 } 4359 } 4360 } 4361 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 4362 4363 /* Create the factorization matrix */ 4364 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4365 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4366 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4367 PetscCall(MatSetUp(B)); 4368 4369 PetscCall(PetscNew(&mumps)); 4370 4371 B->ops->view = MatView_MUMPS; 4372 B->ops->getinfo = MatGetInfo_MUMPS; 4373 4374 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4375 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4376 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4377 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4378 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4379 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4380 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4381 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4382 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4386 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4387 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4388 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4389 4390 if (ftype == MAT_FACTOR_LU) { 4391 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4392 B->factortype = MAT_FACTOR_LU; 4393 mumps->sym = 0; 4394 } else { 4395 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4396 B->factortype = MAT_FACTOR_CHOLESKY; 4397 #if defined(PETSC_USE_COMPLEX) 4398 mumps->sym = 2; 4399 #else 4400 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4401 else mumps->sym = 2; 4402 #endif 4403 } 4404 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 4405 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 4406 4407 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4408 if (size == 1) { 4409 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4410 B->canuseordering = PETSC_TRUE; 4411 } 4412 4413 /* set solvertype */ 4414 PetscCall(PetscFree(B->solvertype)); 4415 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4416 B->ops->destroy = MatDestroy_MUMPS; 4417 B->data = (void *)mumps; 4418 4419 *F = B; 4420 mumps->id.job = JOB_NULL; 4421 mumps->ICNTL_pre = NULL; 4422 mumps->CNTL_pre = NULL; 4423 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4424 PetscFunctionReturn(PETSC_SUCCESS); 4425 } 4426 4427 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 4428 { 4429 PetscFunctionBegin; 4430 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4431 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4432 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4433 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4434 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4435 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4436 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4437 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4438 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4439 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4440 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 4441 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4442 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4443 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4444 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4445 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4446 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4447 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 4448 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 4449 PetscFunctionReturn(PETSC_SUCCESS); 4450 } 4451