1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 #include <petsc/private/vecimpl.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 15 #include <cmumps_c.h> 16 #include <zmumps_c.h> 17 #include <smumps_c.h> 18 #include <dmumps_c.h> 19 #else 20 #if defined(PETSC_USE_COMPLEX) 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <cmumps_c.h> 23 #define MUMPS_c cmumps_c 24 #define MUMPS_STRUC_C CMUMPS_STRUC_C 25 #define MumpsScalar CMUMPS_COMPLEX 26 #else 27 #include <zmumps_c.h> 28 #define MUMPS_c zmumps_c 29 #define MUMPS_STRUC_C ZMUMPS_STRUC_C 30 #define MumpsScalar ZMUMPS_COMPLEX 31 #endif 32 #else 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #include <smumps_c.h> 35 #define MUMPS_c smumps_c 36 #define MUMPS_STRUC_C SMUMPS_STRUC_C 37 #define MumpsScalar SMUMPS_REAL 38 #else 39 #include <dmumps_c.h> 40 #define MUMPS_c dmumps_c 41 #define MUMPS_STRUC_C DMUMPS_STRUC_C 42 #define MumpsScalar DMUMPS_REAL 43 #endif 44 #endif 45 #endif 46 EXTERN_C_END 47 48 #define JOB_INIT -1 49 #define JOB_NULL 0 50 #define JOB_FACTSYMBOLIC 1 51 #define JOB_FACTNUMERIC 2 52 #define JOB_SOLVE 3 53 #define JOB_END -2 54 55 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 56 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 57 naming convention in PetscMPIInt, PetscBLASInt etc. 58 */ 59 typedef MUMPS_INT PetscMUMPSInt; 60 61 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 62 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 63 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 64 #endif 65 #else 66 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 67 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 68 #endif 69 #endif 70 71 #define MPIU_MUMPSINT MPI_INT 72 #define PETSC_MUMPS_INT_MAX 2147483647 73 #define PETSC_MUMPS_INT_MIN -2147483648 74 75 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 76 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 77 { 78 PetscFunctionBegin; 79 #if PetscDefined(USE_64BIT_INDICES) 80 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 81 #endif 82 *b = (PetscMUMPSInt)a; 83 PetscFunctionReturn(PETSC_SUCCESS); 84 } 85 86 /* Put these utility routines here since they are only used in this file */ 87 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 88 { 89 PetscInt myval; 90 PetscBool myset; 91 92 PetscFunctionBegin; 93 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 94 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 95 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 96 if (set) *set = myset; 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 100 101 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C. 102 // 103 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type. 104 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size 105 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall(). 106 // 107 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface. 108 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used. 109 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers. 110 typedef struct { 111 PetscPrecision precision; // precision used by MUMPS 112 void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex 113 114 // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions. 115 MUMPS_INT sym, par, job; 116 MUMPS_INT comm_fortran; /* Fortran communicator */ 117 MUMPS_INT *icntl; 118 void *cntl; // MumpsReal, fixed size array 119 MUMPS_INT n; 120 MUMPS_INT nblk; 121 122 /* Assembled entry */ 123 MUMPS_INT8 nnz; 124 MUMPS_INT *irn; 125 MUMPS_INT *jcn; 126 void *a; // MumpsScalar, centralized input 127 PetscCount a_len; 128 129 /* Distributed entry */ 130 MUMPS_INT8 nnz_loc; 131 MUMPS_INT *irn_loc; 132 MUMPS_INT *jcn_loc; 133 void *a_loc; // MumpsScalar, distributed input 134 PetscCount a_loc_len; 135 136 /* Matrix by blocks */ 137 MUMPS_INT *blkptr; 138 MUMPS_INT *blkvar; 139 140 /* Ordering, if given by user */ 141 MUMPS_INT *perm_in; 142 143 /* RHS, solution, ouptput data and statistics */ 144 void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers 145 PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar 146 147 MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc; 148 MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc; 149 // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0) 150 MUMPS_INT schur_lld; 151 MUMPS_INT *info, *infog; // fixed size array 152 void *rinfo, *rinfog; // MumpsReal, fixed size array 153 154 /* Null space */ 155 MUMPS_INT *pivnul_list; // allocated by MUMPS! 156 MUMPS_INT *mapping; // allocated by MUMPS! 157 158 /* Schur */ 159 MUMPS_INT size_schur; 160 MUMPS_INT *listvar_schur; 161 void *schur; // MumpsScalar 162 PetscCount schur_len; 163 164 /* For out-of-core */ 165 char *ooc_tmpdir; // fixed size array 166 char *ooc_prefix; // fixed size array 167 } XMUMPS_STRUC_C; 168 169 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct 170 #define AllocatInternalID(MUMPS_STRUC_T, outer) \ 171 do { \ 172 MUMPS_STRUC_T *inner; \ 173 PetscCall(PetscNew(&inner)); \ 174 outer->icntl = inner->icntl; \ 175 outer->cntl = inner->cntl; \ 176 outer->info = inner->info; \ 177 outer->infog = inner->infog; \ 178 outer->rinfo = inner->rinfo; \ 179 outer->rinfog = inner->rinfog; \ 180 outer->ooc_tmpdir = inner->ooc_tmpdir; \ 181 outer->ooc_prefix = inner->ooc_prefix; \ 182 /* the three field should never change after init */ \ 183 inner->comm_fortran = outer->comm_fortran; \ 184 inner->par = outer->par; \ 185 inner->sym = outer->sym; \ 186 outer->internal_id = inner; \ 187 } while (0) 188 189 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner 190 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision) 191 { 192 PetscFunctionBegin; 193 outer->precision = precision; 194 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 195 #if defined(PETSC_USE_COMPLEX) 196 if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer); 197 else AllocatInternalID(ZMUMPS_STRUC_C, outer); 198 #else 199 if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer); 200 else AllocatInternalID(DMUMPS_STRUC_C, outer); 201 #endif 202 #else 203 AllocatInternalID(MUMPS_STRUC_C, outer); 204 #endif 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \ 209 do { \ 210 MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \ 211 PetscCall(PetscFree(inner->a)); \ 212 PetscCall(PetscFree(inner->a_loc)); \ 213 PetscCall(PetscFree(inner->redrhs)); \ 214 PetscCall(PetscFree(inner->rhs)); \ 215 PetscCall(PetscFree(inner->rhs_sparse)); \ 216 PetscCall(PetscFree(inner->rhs_loc)); \ 217 PetscCall(PetscFree(inner->sol_loc)); \ 218 PetscCall(PetscFree(inner->schur)); \ 219 } while (0) 220 221 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer) 222 { 223 PetscFunctionBegin; 224 if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer 225 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 226 const PetscPrecision mumps_precision = outer->precision; 227 if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision 228 #if defined(PETSC_USE_COMPLEX) 229 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer); 230 else FreeInternalIDFields(ZMUMPS_STRUC_C, outer); 231 #else 232 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer); 233 else FreeInternalIDFields(DMUMPS_STRUC_C, outer); 234 #endif 235 } 236 #endif 237 PetscCall(PetscFree(outer->internal_id)); 238 } 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>. 243 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array. 244 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>) 245 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request. 246 // 247 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same. 248 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output. 249 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output. 250 // 251 // 252 // Input parameters: 253 // + convert - whether to do PetscScalar to MumpsScalar conversion 254 // . n - length of the PetscScalar array 255 // . pa - [n]], points to the PetscScalar array 256 // . precision - precision of MumpsScalar 257 // . m - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero. 258 // - ma - on input, an existing MumpsScalar array if any. 259 // 260 // Output parameters: 261 // + m - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0 262 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same. 263 // 264 // Note: 265 // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller. 266 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma) 267 { 268 PetscFunctionBegin; 269 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 270 const PetscPrecision mumps_precision = precision; 271 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision); 272 #if defined(PETSC_USE_COMPLEX) 273 if (mumps_precision != PETSC_SCALAR_PRECISION) { 274 if (mumps_precision == PETSC_PRECISION_SINGLE) { 275 if (*m < n) { 276 PetscCall(PetscFree(*ma)); 277 PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma)); 278 *m = n; 279 } 280 if (convert) { 281 CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma; 282 for (PetscCount i = 0; i < n; i++) { 283 b[i].r = PetscRealPart(pa[i]); 284 b[i].i = PetscImaginaryPart(pa[i]); 285 } 286 } 287 } else { 288 if (*m < n) { 289 PetscCall(PetscFree(*ma)); 290 PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma)); 291 *m = n; 292 } 293 if (convert) { 294 ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma; 295 for (PetscCount i = 0; i < n; i++) { 296 b[i].r = PetscRealPart(pa[i]); 297 b[i].i = PetscImaginaryPart(pa[i]); 298 } 299 } 300 } 301 } 302 #else 303 if (mumps_precision != PETSC_SCALAR_PRECISION) { 304 if (mumps_precision == PETSC_PRECISION_SINGLE) { 305 if (*m < n) { 306 PetscCall(PetscFree(*ma)); 307 PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma)); 308 *m = n; 309 } 310 if (convert) { 311 SMUMPS_REAL *b = *(SMUMPS_REAL **)ma; 312 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 313 } 314 } else { 315 if (*m < n) { 316 PetscCall(PetscFree(*ma)); 317 PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma)); 318 *m = n; 319 } 320 if (convert) { 321 DMUMPS_REAL *b = *(DMUMPS_REAL **)ma; 322 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 323 } 324 } 325 } 326 #endif 327 else 328 #endif 329 { 330 if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any 331 *ma = (void *)pa; // same precision, make them alias 332 *m = 0; 333 } 334 PetscFunctionReturn(PETSC_SUCCESS); 335 } 336 337 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>. 338 // 339 // 1) If the two types are different, cast array elements. 340 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 341 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa) 342 { 343 PetscFunctionBegin; 344 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 345 if (mumps_precision != PETSC_SCALAR_PRECISION) { 346 #if defined(PETSC_USE_COMPLEX) 347 if (mumps_precision == PETSC_PRECISION_SINGLE) { 348 PetscReal *a = (PetscReal *)pa; 349 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 350 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 351 } else { 352 PetscReal *a = (PetscReal *)pa; 353 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 354 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 355 } 356 #else 357 if (mumps_precision == PETSC_PRECISION_SINGLE) { 358 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 359 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 360 } else { 361 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 362 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 363 } 364 #endif 365 } else 366 #endif 367 PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n)); 368 PetscFunctionReturn(PETSC_SUCCESS); 369 } 370 371 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>. 372 // 373 // 1) If the two types are different, cast array elements. 374 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 375 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma) 376 { 377 PetscFunctionBegin; 378 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 379 if (mumps_precision != PETSC_SCALAR_PRECISION) { 380 #if defined(PETSC_USE_COMPLEX) 381 if (mumps_precision == PETSC_PRECISION_SINGLE) { 382 CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma; 383 for (PetscCount i = 0; i < n; i++) { 384 b[i].r = PetscRealPart(pa[i]); 385 b[i].i = PetscImaginaryPart(pa[i]); 386 } 387 } else { 388 ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma; 389 for (PetscCount i = 0; i < n; i++) { 390 b[i].r = PetscRealPart(pa[i]); 391 b[i].i = PetscImaginaryPart(pa[i]); 392 } 393 } 394 #else 395 if (mumps_precision == PETSC_PRECISION_SINGLE) { 396 SMUMPS_REAL *b = (SMUMPS_REAL *)ma; 397 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 398 } else { 399 DMUMPS_REAL *b = (DMUMPS_REAL *)ma; 400 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 401 } 402 #endif 403 } else 404 #endif 405 PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n)); 406 PetscFunctionReturn(PETSC_SUCCESS); 407 } 408 409 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id) 410 { 411 return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT; 412 } 413 414 #define PreMumpsCall(inner, outer, mumpsscalar) \ 415 do { \ 416 inner->job = outer->job; \ 417 inner->n = outer->n; \ 418 inner->nblk = outer->nblk; \ 419 inner->nnz = outer->nnz; \ 420 inner->irn = outer->irn; \ 421 inner->jcn = outer->jcn; \ 422 inner->a = (mumpsscalar *)outer->a; \ 423 inner->nnz_loc = outer->nnz_loc; \ 424 inner->irn_loc = outer->irn_loc; \ 425 inner->jcn_loc = outer->jcn_loc; \ 426 inner->a_loc = (mumpsscalar *)outer->a_loc; \ 427 inner->blkptr = outer->blkptr; \ 428 inner->blkvar = outer->blkvar; \ 429 inner->perm_in = outer->perm_in; \ 430 inner->rhs = (mumpsscalar *)outer->rhs; \ 431 inner->redrhs = (mumpsscalar *)outer->redrhs; \ 432 inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \ 433 inner->sol_loc = (mumpsscalar *)outer->sol_loc; \ 434 inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \ 435 inner->irhs_sparse = outer->irhs_sparse; \ 436 inner->irhs_ptr = outer->irhs_ptr; \ 437 inner->isol_loc = outer->isol_loc; \ 438 inner->irhs_loc = outer->irhs_loc; \ 439 inner->nrhs = outer->nrhs; \ 440 inner->lrhs = outer->lrhs; \ 441 inner->lredrhs = outer->lredrhs; \ 442 inner->nz_rhs = outer->nz_rhs; \ 443 inner->lsol_loc = outer->lsol_loc; \ 444 inner->nloc_rhs = outer->nloc_rhs; \ 445 inner->lrhs_loc = outer->lrhs_loc; \ 446 inner->schur_lld = outer->schur_lld; \ 447 inner->size_schur = outer->size_schur; \ 448 inner->listvar_schur = outer->listvar_schur; \ 449 inner->schur = (mumpsscalar *)outer->schur; \ 450 } while (0) 451 452 #define PostMumpsCall(inner, outer) \ 453 do { \ 454 outer->pivnul_list = inner->pivnul_list; \ 455 outer->mapping = inner->mapping; \ 456 } while (0) 457 458 // Entry for PETSc to call mumps 459 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer) 460 { 461 PetscFunctionBegin; 462 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 463 #if defined(PETSC_USE_COMPLEX) 464 if (outer->precision == PETSC_PRECISION_SINGLE) { 465 CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id; 466 PreMumpsCall(inner, outer, CMUMPS_COMPLEX); 467 PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner)); 468 PostMumpsCall(inner, outer); 469 } else { 470 ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id; 471 PreMumpsCall(inner, outer, ZMUMPS_COMPLEX); 472 PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner)); 473 PostMumpsCall(inner, outer); 474 } 475 #else 476 if (outer->precision == PETSC_PRECISION_SINGLE) { 477 SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id; 478 PreMumpsCall(inner, outer, SMUMPS_REAL); 479 PetscStackCallExternalVoid("smumps_c", smumps_c(inner)); 480 PostMumpsCall(inner, outer); 481 } else { 482 DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id; 483 PreMumpsCall(inner, outer, DMUMPS_REAL); 484 PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner)); 485 PostMumpsCall(inner, outer); 486 } 487 #endif 488 #else 489 MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id; 490 PreMumpsCall(inner, outer, MumpsScalar); 491 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner)); 492 PostMumpsCall(inner, outer); 493 #endif 494 PetscFunctionReturn(PETSC_SUCCESS); 495 } 496 497 /* macros s.t. indices match MUMPS documentation */ 498 #define ICNTL(I) icntl[(I) - 1] 499 #define INFOG(I) infog[(I) - 1] 500 #define INFO(I) info[(I) - 1] 501 502 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I! 503 #if defined(PETSC_USE_COMPLEX) 504 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i) 505 #else 506 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I]) 507 #endif 508 509 // Get a value from MumpsReal arrays. The value is convertible to PetscReal. 510 #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1]) 511 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1]) 512 #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1]) 513 514 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL> 515 #define ID_CNTL_SET(ID, I, VAL) \ 516 do { \ 517 if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \ 518 else ((double *)(ID).cntl)[(I) - 1] = (VAL); \ 519 } while (0) 520 521 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 522 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 523 #define PetscMUMPS_c(mumps) \ 524 do { \ 525 if (mumps->use_petsc_omp_support) { \ 526 if (mumps->is_omp_master) { \ 527 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 528 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 529 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 530 PetscCall(PetscFPTrapPop()); \ 531 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 532 } \ 533 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 534 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 535 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 536 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 537 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 538 */ \ 539 SMUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \ 540 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 541 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 542 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 543 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 544 } else { \ 545 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 546 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 547 PetscCall(PetscFPTrapPop()); \ 548 } \ 549 } while (0) 550 #else 551 #define PetscMUMPS_c(mumps) \ 552 do { \ 553 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 554 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 555 PetscCall(PetscFPTrapPop()); \ 556 } while (0) 557 #endif 558 559 typedef struct Mat_MUMPS Mat_MUMPS; 560 struct Mat_MUMPS { 561 XMUMPS_STRUC_C id; 562 563 MatStructure matstruc; 564 PetscMPIInt myid, petsc_size; 565 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 566 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 567 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 568 PetscMUMPSInt sym; 569 MPI_Comm mumps_comm; 570 PetscMUMPSInt *ICNTL_pre; 571 PetscReal *CNTL_pre; 572 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 573 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 574 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 575 PetscMUMPSInt ICNTL26; 576 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 577 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 578 PetscInt *rhs_nrow, max_nrhs; 579 PetscMPIInt *rhs_recvcounts, *rhs_disps; 580 PetscScalar *rhs_loc, *rhs_recvbuf; 581 #endif 582 Vec b_seq, x_seq; 583 PetscInt ninfo, *info; /* which INFO to display */ 584 PetscInt sizeredrhs; 585 PetscScalar *schur_sol; 586 PetscInt schur_sizesol; 587 PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision 588 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 589 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 590 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 591 592 /* Support for MATNEST */ 593 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 594 PetscCount *nest_vals_start; 595 PetscScalar *nest_vals; 596 597 /* stuff used by petsc/mumps OpenMP support*/ 598 PetscBool use_petsc_omp_support; 599 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 600 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 601 PetscCount *recvcount; /* a collection of nnz on omp_master */ 602 PetscMPIInt tag, omp_comm_size; 603 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 604 MPI_Request *reqs; 605 }; 606 607 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 608 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 609 */ 610 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 611 { 612 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 613 614 PetscFunctionBegin; 615 #if defined(PETSC_USE_64BIT_INDICES) 616 { 617 PetscInt i; 618 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 619 PetscCall(PetscFree(mumps->ia_alloc)); 620 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 621 mumps->cur_ilen = nrow + 1; 622 } 623 if (nnz > mumps->cur_jlen) { 624 PetscCall(PetscFree(mumps->ja_alloc)); 625 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 626 mumps->cur_jlen = nnz; 627 } 628 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 629 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 630 *ia_mumps = mumps->ia_alloc; 631 *ja_mumps = mumps->ja_alloc; 632 } 633 #else 634 *ia_mumps = ia; 635 *ja_mumps = ja; 636 #endif 637 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 642 { 643 PetscFunctionBegin; 644 PetscCall(PetscFree(mumps->id.listvar_schur)); 645 PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID() 646 PetscCall(PetscFree(mumps->schur_sol)); 647 mumps->id.size_schur = 0; 648 mumps->id.schur_lld = 0; 649 if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 /* solve with rhs in mumps->id.redrhs and return in the same location */ 654 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 655 { 656 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 657 Mat S, B, X; // solve S*X = B; all three matrices are dense 658 MatFactorSchurStatus schurstatus; 659 PetscInt sizesol; 660 const PetscScalar *xarray; 661 662 PetscFunctionBegin; 663 PetscCall(MatFactorFactorizeSchurComplement(F)); 664 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 665 PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs)); 666 667 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B)); 668 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 669 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 670 PetscCall(MatBindToCPU(B, S->boundtocpu)); 671 #endif 672 switch (schurstatus) { 673 case MAT_FACTOR_SCHUR_FACTORED: 674 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X)); 675 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 676 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 677 PetscCall(MatBindToCPU(X, S->boundtocpu)); 678 #endif 679 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 680 PetscCall(MatMatSolveTranspose(S, B, X)); 681 } else { 682 PetscCall(MatMatSolve(S, B, X)); 683 } 684 break; 685 case MAT_FACTOR_SCHUR_INVERTED: 686 sizesol = mumps->id.nrhs * mumps->id.size_schur; 687 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 688 PetscCall(PetscFree(mumps->schur_sol)); 689 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 690 mumps->schur_sizesol = sizesol; 691 } 692 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 693 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 694 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 695 PetscCall(MatBindToCPU(X, S->boundtocpu)); 696 #endif 697 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 698 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 699 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 700 } else { 701 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 702 } 703 PetscCall(MatProductSetFromOptions(X)); 704 PetscCall(MatProductSymbolic(X)); 705 PetscCall(MatProductNumeric(X)); 706 707 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 708 break; 709 default: 710 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 711 } 712 // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device. 713 PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs. 714 PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs)); 715 PetscCall(MatDenseRestoreArrayRead(X, &xarray)); 716 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 717 PetscCall(MatDestroy(&B)); 718 PetscCall(MatDestroy(&X)); 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 722 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 723 { 724 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 725 726 PetscFunctionBegin; 727 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 728 PetscFunctionReturn(PETSC_SUCCESS); 729 } 730 if (!expansion) { /* prepare for the condensation step */ 731 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 732 /* allocate MUMPS internal array to store reduced right-hand sides */ 733 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 734 mumps->id.lredrhs = mumps->id.size_schur; 735 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 736 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 737 PetscCall(PetscFree(mumps->redrhs)); 738 PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs)); 739 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs)); 740 } 741 } else { /* prepare for the expansion step */ 742 PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */ 743 mumps->id.ICNTL(26) = 2; /* expansion phase */ 744 PetscMUMPS_c(mumps); 745 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 746 /* restore defaults */ 747 mumps->id.ICNTL(26) = -1; 748 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 749 if (mumps->id.nrhs > 1) { 750 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 751 PetscCall(PetscFree(mumps->redrhs)); 752 mumps->id.redrhs_len = 0; 753 mumps->id.lredrhs = 0; 754 mumps->sizeredrhs = 0; 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 /* 761 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 762 763 input: 764 A - matrix in aij,baij or sbaij format 765 shift - 0: C style output triple; 1: Fortran style output triple. 766 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 767 MAT_REUSE_MATRIX: only the values in v array are updated 768 output: 769 nnz - dim of r, c, and v (number of local nonzero entries of A) 770 r, c, v - row and col index, matrix values (matrix triples) 771 772 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 773 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 774 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 775 776 */ 777 778 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 779 { 780 const PetscScalar *av; 781 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 782 PetscCount nz, rnz, k; 783 PetscMUMPSInt *row, *col; 784 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 785 786 PetscFunctionBegin; 787 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 788 if (reuse == MAT_INITIAL_MATRIX) { 789 nz = aa->nz; 790 ai = aa->i; 791 aj = aa->j; 792 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 793 for (PetscCount i = k = 0; i < M; i++) { 794 rnz = ai[i + 1] - ai[i]; 795 ajj = aj + ai[i]; 796 for (PetscCount j = 0; j < rnz; j++) { 797 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 798 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 799 k++; 800 } 801 } 802 mumps->val = (PetscScalar *)av; 803 mumps->irn = row; 804 mumps->jcn = col; 805 mumps->nnz = nz; 806 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 807 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 808 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 813 { 814 PetscCount nz, i, j, k, r; 815 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 816 PetscMUMPSInt *row, *col; 817 818 PetscFunctionBegin; 819 nz = a->sliidx[a->totalslices]; 820 if (reuse == MAT_INITIAL_MATRIX) { 821 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 822 for (i = k = 0; i < a->totalslices; i++) { 823 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 824 } 825 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 826 mumps->irn = row; 827 mumps->jcn = col; 828 mumps->nnz = nz; 829 mumps->val = a->val; 830 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 831 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 836 { 837 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 838 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 839 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 840 PetscInt bs; 841 PetscMUMPSInt *row, *col; 842 843 PetscFunctionBegin; 844 if (reuse == MAT_INITIAL_MATRIX) { 845 PetscCall(MatGetBlockSize(A, &bs)); 846 M = A->rmap->N / bs; 847 ai = aa->i; 848 aj = aa->j; 849 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 850 for (i = 0; i < M; i++) { 851 ajj = aj + ai[i]; 852 rnz = ai[i + 1] - ai[i]; 853 for (k = 0; k < rnz; k++) { 854 for (j = 0; j < bs; j++) { 855 for (m = 0; m < bs; m++) { 856 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 857 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 858 idx++; 859 } 860 } 861 } 862 } 863 mumps->irn = row; 864 mumps->jcn = col; 865 mumps->nnz = nz; 866 mumps->val = aa->a; 867 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 868 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 869 PetscFunctionReturn(PETSC_SUCCESS); 870 } 871 872 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 873 { 874 const PetscInt *ai, *aj, *ajj; 875 PetscInt bs; 876 PetscCount nz, rnz, i, j, k, m; 877 PetscMUMPSInt *row, *col; 878 PetscScalar *val; 879 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 880 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 881 #if defined(PETSC_USE_COMPLEX) 882 PetscBool isset, hermitian; 883 #endif 884 885 PetscFunctionBegin; 886 #if defined(PETSC_USE_COMPLEX) 887 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 888 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 889 #endif 890 ai = aa->i; 891 aj = aa->j; 892 PetscCall(MatGetBlockSize(A, &bs)); 893 if (reuse == MAT_INITIAL_MATRIX) { 894 const PetscCount alloc_size = aa->nz * bs2; 895 896 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 897 if (bs > 1) { 898 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 899 mumps->val = mumps->val_alloc; 900 } else { 901 mumps->val = aa->a; 902 } 903 mumps->irn = row; 904 mumps->jcn = col; 905 } else { 906 row = mumps->irn; 907 col = mumps->jcn; 908 } 909 val = mumps->val; 910 911 nz = 0; 912 if (bs > 1) { 913 for (i = 0; i < mbs; i++) { 914 rnz = ai[i + 1] - ai[i]; 915 ajj = aj + ai[i]; 916 for (j = 0; j < rnz; j++) { 917 for (k = 0; k < bs; k++) { 918 for (m = 0; m < bs; m++) { 919 if (ajj[j] > i || k >= m) { 920 if (reuse == MAT_INITIAL_MATRIX) { 921 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 922 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 923 } 924 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 925 } 926 } 927 } 928 } 929 } 930 } else if (reuse == MAT_INITIAL_MATRIX) { 931 for (i = 0; i < mbs; i++) { 932 rnz = ai[i + 1] - ai[i]; 933 ajj = aj + ai[i]; 934 for (j = 0; j < rnz; j++) { 935 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 936 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 937 nz++; 938 } 939 } 940 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 941 } else if (mumps->nest_vals) 942 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 943 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 944 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 945 PetscFunctionReturn(PETSC_SUCCESS); 946 } 947 948 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 949 { 950 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 951 PetscCount nz, rnz, i, j; 952 const PetscScalar *av, *v1; 953 PetscScalar *val; 954 PetscMUMPSInt *row, *col; 955 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 956 PetscBool missing; 957 #if defined(PETSC_USE_COMPLEX) 958 PetscBool hermitian, isset; 959 #endif 960 961 PetscFunctionBegin; 962 #if defined(PETSC_USE_COMPLEX) 963 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 964 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 965 #endif 966 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 967 ai = aa->i; 968 aj = aa->j; 969 adiag = aa->diag; 970 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 971 if (reuse == MAT_INITIAL_MATRIX) { 972 /* count nz in the upper triangular part of A */ 973 nz = 0; 974 if (missing) { 975 for (i = 0; i < M; i++) { 976 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 977 for (j = ai[i]; j < ai[i + 1]; j++) { 978 if (aj[j] < i) continue; 979 nz++; 980 } 981 } else { 982 nz += ai[i + 1] - adiag[i]; 983 } 984 } 985 } else { 986 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 987 } 988 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 989 PetscCall(PetscMalloc1(nz, &val)); 990 mumps->nnz = nz; 991 mumps->irn = row; 992 mumps->jcn = col; 993 mumps->val = mumps->val_alloc = val; 994 995 nz = 0; 996 if (missing) { 997 for (i = 0; i < M; i++) { 998 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 999 for (j = ai[i]; j < ai[i + 1]; j++) { 1000 if (aj[j] < i) continue; 1001 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1002 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 1003 val[nz] = av[j]; 1004 nz++; 1005 } 1006 } else { 1007 rnz = ai[i + 1] - adiag[i]; 1008 ajj = aj + adiag[i]; 1009 v1 = av + adiag[i]; 1010 for (j = 0; j < rnz; j++) { 1011 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1012 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1013 val[nz++] = v1[j]; 1014 } 1015 } 1016 } 1017 } else { 1018 for (i = 0; i < M; i++) { 1019 rnz = ai[i + 1] - adiag[i]; 1020 ajj = aj + adiag[i]; 1021 v1 = av + adiag[i]; 1022 for (j = 0; j < rnz; j++) { 1023 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1024 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1025 val[nz++] = v1[j]; 1026 } 1027 } 1028 } 1029 } else { 1030 nz = 0; 1031 val = mumps->val; 1032 if (missing) { 1033 for (i = 0; i < M; i++) { 1034 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1035 for (j = ai[i]; j < ai[i + 1]; j++) { 1036 if (aj[j] < i) continue; 1037 val[nz++] = av[j]; 1038 } 1039 } else { 1040 rnz = ai[i + 1] - adiag[i]; 1041 v1 = av + adiag[i]; 1042 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1043 } 1044 } 1045 } else { 1046 for (i = 0; i < M; i++) { 1047 rnz = ai[i + 1] - adiag[i]; 1048 v1 = av + adiag[i]; 1049 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1050 } 1051 } 1052 } 1053 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1058 { 1059 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 1060 PetscInt bs; 1061 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 1062 PetscMUMPSInt *row, *col; 1063 const PetscScalar *av, *bv, *v1, *v2; 1064 PetscScalar *val; 1065 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 1066 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 1067 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1068 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 1069 #if defined(PETSC_USE_COMPLEX) 1070 PetscBool hermitian, isset; 1071 #endif 1072 1073 PetscFunctionBegin; 1074 #if defined(PETSC_USE_COMPLEX) 1075 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1076 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1077 #endif 1078 PetscCall(MatGetBlockSize(A, &bs)); 1079 rstart = A->rmap->rstart; 1080 ai = aa->i; 1081 aj = aa->j; 1082 bi = bb->i; 1083 bj = bb->j; 1084 av = aa->a; 1085 bv = bb->a; 1086 1087 garray = mat->garray; 1088 1089 if (reuse == MAT_INITIAL_MATRIX) { 1090 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 1091 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1092 PetscCall(PetscMalloc1(nz, &val)); 1093 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 1094 mumps->irn = row; 1095 mumps->jcn = col; 1096 mumps->val = mumps->val_alloc = val; 1097 } else { 1098 val = mumps->val; 1099 } 1100 1101 jj = 0; 1102 irow = rstart; 1103 for (i = 0; i < mbs; i++) { 1104 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1105 countA = ai[i + 1] - ai[i]; 1106 countB = bi[i + 1] - bi[i]; 1107 bjj = bj + bi[i]; 1108 v1 = av + ai[i] * bs2; 1109 v2 = bv + bi[i] * bs2; 1110 1111 if (bs > 1) { 1112 /* A-part */ 1113 for (j = 0; j < countA; j++) { 1114 for (k = 0; k < bs; k++) { 1115 for (m = 0; m < bs; m++) { 1116 if (rstart + ajj[j] * bs > irow || k >= m) { 1117 if (reuse == MAT_INITIAL_MATRIX) { 1118 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1119 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 1120 } 1121 val[jj++] = v1[j * bs2 + m + k * bs]; 1122 } 1123 } 1124 } 1125 } 1126 1127 /* B-part */ 1128 for (j = 0; j < countB; j++) { 1129 for (k = 0; k < bs; k++) { 1130 for (m = 0; m < bs; m++) { 1131 if (reuse == MAT_INITIAL_MATRIX) { 1132 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1133 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 1134 } 1135 val[jj++] = v2[j * bs2 + m + k * bs]; 1136 } 1137 } 1138 } 1139 } else { 1140 /* A-part */ 1141 for (j = 0; j < countA; j++) { 1142 if (reuse == MAT_INITIAL_MATRIX) { 1143 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1144 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1145 } 1146 val[jj++] = v1[j]; 1147 } 1148 1149 /* B-part */ 1150 for (j = 0; j < countB; j++) { 1151 if (reuse == MAT_INITIAL_MATRIX) { 1152 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1153 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1154 } 1155 val[jj++] = v2[j]; 1156 } 1157 } 1158 irow += bs; 1159 } 1160 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 1161 PetscFunctionReturn(PETSC_SUCCESS); 1162 } 1163 1164 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1165 { 1166 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1167 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 1168 PetscMUMPSInt *row, *col; 1169 const PetscScalar *av, *bv, *v1, *v2; 1170 PetscScalar *val; 1171 Mat Ad, Ao; 1172 Mat_SeqAIJ *aa; 1173 Mat_SeqAIJ *bb; 1174 1175 PetscFunctionBegin; 1176 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1177 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1178 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1179 1180 aa = (Mat_SeqAIJ *)Ad->data; 1181 bb = (Mat_SeqAIJ *)Ao->data; 1182 ai = aa->i; 1183 aj = aa->j; 1184 bi = bb->i; 1185 bj = bb->j; 1186 1187 rstart = A->rmap->rstart; 1188 cstart = A->cmap->rstart; 1189 1190 if (reuse == MAT_INITIAL_MATRIX) { 1191 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 1192 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1193 PetscCall(PetscMalloc1(nz, &val)); 1194 mumps->nnz = nz; 1195 mumps->irn = row; 1196 mumps->jcn = col; 1197 mumps->val = mumps->val_alloc = val; 1198 } else { 1199 val = mumps->val; 1200 } 1201 1202 jj = 0; 1203 irow = rstart; 1204 for (i = 0; i < m; i++) { 1205 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1206 countA = ai[i + 1] - ai[i]; 1207 countB = bi[i + 1] - bi[i]; 1208 bjj = bj + bi[i]; 1209 v1 = av + ai[i]; 1210 v2 = bv + bi[i]; 1211 1212 /* A-part */ 1213 for (j = 0; j < countA; j++) { 1214 if (reuse == MAT_INITIAL_MATRIX) { 1215 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1216 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 1217 } 1218 val[jj++] = v1[j]; 1219 } 1220 1221 /* B-part */ 1222 for (j = 0; j < countB; j++) { 1223 if (reuse == MAT_INITIAL_MATRIX) { 1224 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1225 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1226 } 1227 val[jj++] = v2[j]; 1228 } 1229 irow++; 1230 } 1231 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1232 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1233 PetscFunctionReturn(PETSC_SUCCESS); 1234 } 1235 1236 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1237 { 1238 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 1239 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 1240 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1241 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 1242 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 1243 const PetscInt bs2 = mat->bs2; 1244 PetscInt bs; 1245 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 1246 PetscMUMPSInt *row, *col; 1247 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 1248 PetscScalar *val; 1249 1250 PetscFunctionBegin; 1251 PetscCall(MatGetBlockSize(A, &bs)); 1252 if (reuse == MAT_INITIAL_MATRIX) { 1253 nz = bs2 * (aa->nz + bb->nz); 1254 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1255 PetscCall(PetscMalloc1(nz, &val)); 1256 mumps->nnz = nz; 1257 mumps->irn = row; 1258 mumps->jcn = col; 1259 mumps->val = mumps->val_alloc = val; 1260 } else { 1261 val = mumps->val; 1262 } 1263 1264 jj = 0; 1265 irow = rstart; 1266 for (i = 0; i < mbs; i++) { 1267 countA = ai[i + 1] - ai[i]; 1268 countB = bi[i + 1] - bi[i]; 1269 ajj = aj + ai[i]; 1270 bjj = bj + bi[i]; 1271 v1 = av + bs2 * ai[i]; 1272 v2 = bv + bs2 * bi[i]; 1273 1274 idx = 0; 1275 /* A-part */ 1276 for (k = 0; k < countA; k++) { 1277 for (j = 0; j < bs; j++) { 1278 for (n = 0; n < bs; n++) { 1279 if (reuse == MAT_INITIAL_MATRIX) { 1280 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1281 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 1282 } 1283 val[jj++] = v1[idx++]; 1284 } 1285 } 1286 } 1287 1288 idx = 0; 1289 /* B-part */ 1290 for (k = 0; k < countB; k++) { 1291 for (j = 0; j < bs; j++) { 1292 for (n = 0; n < bs; n++) { 1293 if (reuse == MAT_INITIAL_MATRIX) { 1294 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1295 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 1296 } 1297 val[jj++] = v2[idx++]; 1298 } 1299 } 1300 } 1301 irow += bs; 1302 } 1303 PetscFunctionReturn(PETSC_SUCCESS); 1304 } 1305 1306 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1307 { 1308 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1309 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 1310 PetscMUMPSInt *row, *col; 1311 const PetscScalar *av, *bv, *v1, *v2; 1312 PetscScalar *val; 1313 Mat Ad, Ao; 1314 Mat_SeqAIJ *aa; 1315 Mat_SeqAIJ *bb; 1316 #if defined(PETSC_USE_COMPLEX) 1317 PetscBool hermitian, isset; 1318 #endif 1319 1320 PetscFunctionBegin; 1321 #if defined(PETSC_USE_COMPLEX) 1322 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1323 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1324 #endif 1325 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1326 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1327 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1328 1329 aa = (Mat_SeqAIJ *)Ad->data; 1330 bb = (Mat_SeqAIJ *)Ao->data; 1331 ai = aa->i; 1332 aj = aa->j; 1333 adiag = aa->diag; 1334 bi = bb->i; 1335 bj = bb->j; 1336 1337 rstart = A->rmap->rstart; 1338 1339 if (reuse == MAT_INITIAL_MATRIX) { 1340 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 1341 nzb = 0; /* num of upper triangular entries in mat->B */ 1342 for (i = 0; i < m; i++) { 1343 nza += (ai[i + 1] - adiag[i]); 1344 countB = bi[i + 1] - bi[i]; 1345 bjj = bj + bi[i]; 1346 for (j = 0; j < countB; j++) { 1347 if (garray[bjj[j]] > rstart) nzb++; 1348 } 1349 } 1350 1351 nz = nza + nzb; /* total nz of upper triangular part of mat */ 1352 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1353 PetscCall(PetscMalloc1(nz, &val)); 1354 mumps->nnz = nz; 1355 mumps->irn = row; 1356 mumps->jcn = col; 1357 mumps->val = mumps->val_alloc = val; 1358 } else { 1359 val = mumps->val; 1360 } 1361 1362 jj = 0; 1363 irow = rstart; 1364 for (i = 0; i < m; i++) { 1365 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 1366 v1 = av + adiag[i]; 1367 countA = ai[i + 1] - adiag[i]; 1368 countB = bi[i + 1] - bi[i]; 1369 bjj = bj + bi[i]; 1370 v2 = bv + bi[i]; 1371 1372 /* A-part */ 1373 for (j = 0; j < countA; j++) { 1374 if (reuse == MAT_INITIAL_MATRIX) { 1375 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1376 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1377 } 1378 val[jj++] = v1[j]; 1379 } 1380 1381 /* B-part */ 1382 for (j = 0; j < countB; j++) { 1383 if (garray[bjj[j]] > rstart) { 1384 if (reuse == MAT_INITIAL_MATRIX) { 1385 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1386 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1387 } 1388 val[jj++] = v2[j]; 1389 } 1390 } 1391 irow++; 1392 } 1393 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1394 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1395 PetscFunctionReturn(PETSC_SUCCESS); 1396 } 1397 1398 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1399 { 1400 const PetscScalar *av; 1401 const PetscInt M = A->rmap->n; 1402 PetscCount i; 1403 PetscMUMPSInt *row, *col; 1404 Vec v; 1405 1406 PetscFunctionBegin; 1407 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1408 PetscCall(VecGetArrayRead(v, &av)); 1409 if (reuse == MAT_INITIAL_MATRIX) { 1410 PetscCall(PetscMalloc2(M, &row, M, &col)); 1411 for (i = 0; i < M; i++) { 1412 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1413 col[i] = row[i]; 1414 } 1415 mumps->val = (PetscScalar *)av; 1416 mumps->irn = row; 1417 mumps->jcn = col; 1418 mumps->nnz = M; 1419 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1420 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1421 PetscCall(VecRestoreArrayRead(v, &av)); 1422 PetscFunctionReturn(PETSC_SUCCESS); 1423 } 1424 1425 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1426 { 1427 PetscScalar *v; 1428 const PetscInt m = A->rmap->n, N = A->cmap->N; 1429 PetscInt lda; 1430 PetscCount i, j; 1431 PetscMUMPSInt *row, *col; 1432 1433 PetscFunctionBegin; 1434 PetscCall(MatDenseGetArray(A, &v)); 1435 PetscCall(MatDenseGetLDA(A, &lda)); 1436 if (reuse == MAT_INITIAL_MATRIX) { 1437 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1438 for (i = 0; i < m; i++) { 1439 col[i] = 0; 1440 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1441 } 1442 for (j = 1; j < N; j++) { 1443 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1444 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1445 } 1446 if (lda == m) mumps->val = v; 1447 else { 1448 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1449 mumps->val_alloc = mumps->val; 1450 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1451 } 1452 mumps->irn = row; 1453 mumps->jcn = col; 1454 mumps->nnz = m * N; 1455 } else { 1456 if (lda == m && !mumps->nest_vals) mumps->val = v; 1457 else { 1458 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1459 } 1460 } 1461 PetscCall(MatDenseRestoreArray(A, &v)); 1462 PetscFunctionReturn(PETSC_SUCCESS); 1463 } 1464 1465 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 1466 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 1467 // and its rows and columns permuted 1468 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 1469 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 1470 { 1471 Mat A; 1472 PetscScalar s[2]; 1473 PetscBool isTrans, isHTrans, compare; 1474 1475 PetscFunctionBegin; 1476 do { 1477 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1478 if (isTrans) { 1479 PetscCall(MatTransposeGetMat(*sub, &A)); 1480 isHTrans = PETSC_FALSE; 1481 } else { 1482 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1483 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 1484 } 1485 compare = (PetscBool)(isTrans || isHTrans); 1486 if (compare) { 1487 if (vshift && vscale) { 1488 PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1489 if (!*conjugate) { 1490 *vshift += s[0] * *vscale; 1491 *vscale *= s[1]; 1492 } else { 1493 *vshift += PetscConj(s[0]) * *vscale; 1494 *vscale *= PetscConj(s[1]); 1495 } 1496 } 1497 if (swap) *swap = (PetscBool)!*swap; 1498 if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 1499 *sub = A; 1500 } 1501 } while (compare); 1502 PetscFunctionReturn(PETSC_SUCCESS); 1503 } 1504 1505 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1506 { 1507 Mat **mats; 1508 PetscInt nr, nc; 1509 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1510 1511 PetscFunctionBegin; 1512 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1513 if (reuse == MAT_INITIAL_MATRIX) { 1514 PetscMUMPSInt *irns, *jcns; 1515 PetscScalar *vals; 1516 PetscCount totnnz, cumnnz, maxnnz; 1517 PetscInt *pjcns_w, Mbs = 0; 1518 IS *rows, *cols; 1519 PetscInt **rows_idx, **cols_idx; 1520 1521 cumnnz = 0; 1522 maxnnz = 0; 1523 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1524 for (PetscInt r = 0; r < nr; r++) { 1525 for (PetscInt c = 0; c < nc; c++) { 1526 Mat sub = mats[r][c]; 1527 1528 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1529 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1530 if (sub) { 1531 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1532 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 1533 MatInfo info; 1534 1535 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 1536 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1537 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1538 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1539 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1540 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1541 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1542 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1543 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1544 1545 if (chol) { 1546 if (r == c) { 1547 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1548 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1549 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1550 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1551 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1552 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1553 } else { 1554 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1555 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1556 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1557 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1558 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1559 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1560 } 1561 } else { 1562 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1563 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1564 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1565 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1566 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1567 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1568 } 1569 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1570 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1571 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1572 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1573 maxnnz = PetscMax(maxnnz, info.nz_used); 1574 } 1575 } 1576 } 1577 1578 /* Allocate total COO */ 1579 totnnz = cumnnz; 1580 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1581 PetscCall(PetscMalloc1(totnnz, &vals)); 1582 1583 /* Handle rows and column maps 1584 We directly map rows and use an SF for the columns */ 1585 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1586 PetscCall(MatNestGetISs(A, rows, cols)); 1587 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1588 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1589 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1590 else (void)maxnnz; 1591 1592 cumnnz = 0; 1593 for (PetscInt r = 0; r < nr; r++) { 1594 for (PetscInt c = 0; c < nc; c++) { 1595 Mat sub = mats[r][c]; 1596 const PetscInt *ridx = rows_idx[r]; 1597 const PetscInt *cidx = cols_idx[c]; 1598 PetscScalar vscale = 1.0, vshift = 0.0; 1599 PetscInt rst, size, bs; 1600 PetscSF csf; 1601 PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 1602 PetscLayout cmap; 1603 PetscInt innz; 1604 1605 mumps->nest_vals_start[r * nc + c] = cumnnz; 1606 if (c == r) { 1607 PetscCall(ISGetSize(rows[r], &size)); 1608 if (!mumps->nest_convert_to_triples[r * nc + c]) { 1609 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1610 } 1611 PetscCall(MatGetBlockSize(sub, &bs)); 1612 Mbs += size / bs; 1613 } 1614 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1615 1616 /* Extract inner blocks if needed */ 1617 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 1618 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1619 1620 /* Get column layout to map off-process columns */ 1621 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1622 1623 /* Get row start to map on-process rows */ 1624 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1625 1626 /* Directly use the mumps datastructure and use C ordering for now */ 1627 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1628 1629 /* Swap the role of rows and columns indices for transposed blocks 1630 since we need values with global final ordering */ 1631 if (swap) { 1632 cidx = rows_idx[r]; 1633 ridx = cols_idx[c]; 1634 } 1635 1636 /* Communicate column indices 1637 This could have been done with a single SF but it would have complicated the code a lot. 1638 But since we do it only once, we pay the price of setting up an SF for each block */ 1639 if (PetscDefined(USE_64BIT_INDICES)) { 1640 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1641 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1642 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1643 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1644 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1645 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1646 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1647 PetscCall(PetscSFDestroy(&csf)); 1648 1649 /* Import indices: use direct map for rows and mapped indices for columns */ 1650 if (swap) { 1651 for (PetscInt k = 0; k < mumps->nnz; k++) { 1652 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1653 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1654 } 1655 } else { 1656 for (PetscInt k = 0; k < mumps->nnz; k++) { 1657 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1658 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1659 } 1660 } 1661 1662 /* Import values to full COO */ 1663 if (conjugate) { /* conjugate the entries */ 1664 PetscScalar *v = vals + cumnnz; 1665 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 1666 } else if (vscale != 1.0) { 1667 PetscScalar *v = vals + cumnnz; 1668 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 1669 } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1670 1671 /* Shift new starting point and sanity check */ 1672 cumnnz += mumps->nnz; 1673 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1674 1675 /* Free scratch memory */ 1676 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1677 PetscCall(PetscFree(mumps->val_alloc)); 1678 mumps->val = NULL; 1679 mumps->nnz = 0; 1680 } 1681 } 1682 if (mumps->id.ICNTL(15) == 1) { 1683 if (Mbs != A->rmap->N) { 1684 PetscMPIInt rank, size; 1685 1686 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1687 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1688 if (rank == 0) { 1689 PetscInt shift = 0; 1690 1691 PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 1692 PetscCall(PetscFree(mumps->id.blkptr)); 1693 PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 1694 mumps->id.blkptr[0] = 1; 1695 for (PetscInt i = 0; i < size; ++i) { 1696 for (PetscInt r = 0; r < nr; r++) { 1697 Mat sub = mats[r][r]; 1698 const PetscInt *ranges; 1699 PetscInt bs; 1700 1701 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1702 PetscCall(MatGetOwnershipRanges(sub, &ranges)); 1703 PetscCall(MatGetBlockSize(sub, &bs)); 1704 for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 1705 shift += (ranges[i + 1] - ranges[i]) / bs; 1706 } 1707 } 1708 } 1709 } else mumps->id.ICNTL(15) = 0; 1710 } 1711 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1712 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1713 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1714 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1715 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1716 mumps->nest_vals_start[nr * nc] = cumnnz; 1717 1718 /* Set pointers for final MUMPS data structure */ 1719 mumps->nest_vals = vals; 1720 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1721 mumps->val = vals; 1722 mumps->irn = irns; 1723 mumps->jcn = jcns; 1724 mumps->nnz = cumnnz; 1725 } else { 1726 PetscScalar *oval = mumps->nest_vals; 1727 for (PetscInt r = 0; r < nr; r++) { 1728 for (PetscInt c = 0; c < nc; c++) { 1729 PetscBool conjugate = PETSC_FALSE; 1730 Mat sub = mats[r][c]; 1731 PetscScalar vscale = 1.0, vshift = 0.0; 1732 PetscInt midx = r * nc + c; 1733 1734 if (!mumps->nest_convert_to_triples[midx]) continue; 1735 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 1736 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1737 mumps->val = oval + mumps->nest_vals_start[midx]; 1738 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1739 if (conjugate) { 1740 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1741 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 1742 } else if (vscale != 1.0) { 1743 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1744 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 1745 } 1746 } 1747 } 1748 mumps->val = oval; 1749 } 1750 PetscFunctionReturn(PETSC_SUCCESS); 1751 } 1752 1753 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1754 { 1755 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1756 1757 PetscFunctionBegin; 1758 PetscCall(PetscFree(mumps->id.isol_loc)); 1759 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1760 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1761 PetscCall(VecDestroy(&mumps->b_seq)); 1762 PetscCall(VecDestroy(&mumps->x_seq)); 1763 PetscCall(PetscFree(mumps->id.perm_in)); 1764 PetscCall(PetscFree(mumps->id.blkvar)); 1765 PetscCall(PetscFree(mumps->id.blkptr)); 1766 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1767 PetscCall(PetscFree(mumps->val_alloc)); 1768 PetscCall(PetscFree(mumps->info)); 1769 PetscCall(PetscFree(mumps->ICNTL_pre)); 1770 PetscCall(PetscFree(mumps->CNTL_pre)); 1771 PetscCall(MatMumpsResetSchur_Private(mumps)); 1772 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1773 mumps->id.job = JOB_END; 1774 PetscMUMPS_c(mumps); 1775 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1776 if (mumps->mumps_comm != MPI_COMM_NULL) { 1777 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1778 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1779 } 1780 } 1781 PetscCall(MatMumpsFreeInternalID(&mumps->id)); 1782 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1783 if (mumps->use_petsc_omp_support) { 1784 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1785 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1786 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1787 } 1788 #endif 1789 PetscCall(PetscFree(mumps->ia_alloc)); 1790 PetscCall(PetscFree(mumps->ja_alloc)); 1791 PetscCall(PetscFree(mumps->recvcount)); 1792 PetscCall(PetscFree(mumps->reqs)); 1793 PetscCall(PetscFree(mumps->irhs_loc)); 1794 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1795 PetscCall(PetscFree(mumps->nest_vals)); 1796 PetscCall(PetscFree(A->data)); 1797 1798 /* clear composed functions */ 1799 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1800 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1801 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1802 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1803 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1804 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1805 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1806 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1807 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1808 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1809 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1810 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1811 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1812 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1813 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 1814 PetscFunctionReturn(PETSC_SUCCESS); 1815 } 1816 1817 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1818 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1819 { 1820 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1821 const PetscMPIInt ompsize = mumps->omp_comm_size; 1822 PetscInt i, m, M, rstart; 1823 1824 PetscFunctionBegin; 1825 PetscCall(MatGetSize(A, &M, NULL)); 1826 PetscCall(MatGetLocalSize(A, &m, NULL)); 1827 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1828 if (ompsize == 1) { 1829 if (!mumps->irhs_loc) { 1830 mumps->nloc_rhs = (PetscMUMPSInt)m; 1831 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1832 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1833 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1834 } 1835 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1836 } else { 1837 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1838 const PetscInt *ranges; 1839 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1840 MPI_Group petsc_group, omp_group; 1841 PetscScalar *recvbuf = NULL; 1842 1843 if (mumps->is_omp_master) { 1844 /* Lazily initialize the omp stuff for distributed rhs */ 1845 if (!mumps->irhs_loc) { 1846 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1847 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1848 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1849 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1850 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1851 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1852 1853 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1854 mumps->nloc_rhs = 0; 1855 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1856 for (j = 0; j < ompsize; j++) { 1857 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1858 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1859 } 1860 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1861 for (j = k = 0; j < ompsize; j++) { 1862 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */ 1863 } 1864 1865 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1866 PetscCallMPI(MPI_Group_free(&petsc_group)); 1867 PetscCallMPI(MPI_Group_free(&omp_group)); 1868 } 1869 1870 /* Realloc buffers when current nrhs is bigger than what we have met */ 1871 if (nrhs > mumps->max_nrhs) { 1872 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1873 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1874 mumps->max_nrhs = nrhs; 1875 } 1876 1877 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1878 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1879 mumps->rhs_disps[0] = 0; 1880 for (j = 1; j < ompsize; j++) { 1881 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1882 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1883 } 1884 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1885 } 1886 1887 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1888 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1889 1890 if (mumps->is_omp_master) { 1891 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1892 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1893 for (j = 0; j < ompsize; j++) { 1894 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1895 dst = dstbase; 1896 for (i = 0; i < nrhs; i++) { 1897 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1898 src += mumps->rhs_nrow[j]; 1899 dst += mumps->nloc_rhs; 1900 } 1901 dstbase += mumps->rhs_nrow[j]; 1902 } 1903 } 1904 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1905 } 1906 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1907 } 1908 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1909 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1910 mumps->id.lrhs_loc = mumps->nloc_rhs; 1911 mumps->id.irhs_loc = mumps->irhs_loc; 1912 PetscFunctionReturn(PETSC_SUCCESS); 1913 } 1914 1915 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1916 { 1917 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1918 const PetscScalar *barray = NULL; 1919 PetscScalar *array; 1920 IS is_iden, is_petsc; 1921 PetscInt i; 1922 PetscBool second_solve = PETSC_FALSE; 1923 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1924 1925 PetscFunctionBegin; 1926 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1927 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1928 &cite1)); 1929 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1930 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1931 &cite2)); 1932 1933 PetscCall(VecFlag(x, A->factorerrortype)); 1934 if (A->factorerrortype) { 1935 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1936 PetscFunctionReturn(PETSC_SUCCESS); 1937 } 1938 1939 mumps->id.nrhs = 1; 1940 if (mumps->petsc_size > 1) { 1941 if (mumps->ICNTL20 == 10) { 1942 mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */ 1943 PetscCall(VecGetArrayRead(b, &barray)); 1944 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray)); 1945 } else { 1946 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/ 1947 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1948 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1949 if (!mumps->myid) { 1950 PetscCall(VecGetArray(mumps->b_seq, &array)); 1951 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1952 } 1953 } 1954 } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */ 1955 mumps->id.ICNTL(20) = 0; 1956 PetscCall(VecCopy(b, x)); 1957 PetscCall(VecGetArray(x, &array)); 1958 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1959 } 1960 1961 /* 1962 handle condensation step of Schur complement (if any) 1963 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1964 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1965 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1966 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1967 */ 1968 if (mumps->id.size_schur > 0) { 1969 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1970 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1971 second_solve = PETSC_TRUE; 1972 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 1973 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1974 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1975 } 1976 1977 mumps->id.job = JOB_SOLVE; 1978 PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs 1979 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1980 1981 /* handle expansion step of Schur complement (if any) */ 1982 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1983 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 1984 PetscCall(MatMumpsSolveSchur_Private(A)); 1985 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i); 1986 } 1987 1988 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1989 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1990 /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1991 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1992 } 1993 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1994 PetscInt *isol2_loc = NULL; 1995 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1996 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1997 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1998 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1999 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 2000 PetscCall(ISDestroy(&is_iden)); 2001 PetscCall(ISDestroy(&is_petsc)); 2002 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 2003 } 2004 2005 PetscScalar *xarray; 2006 PetscCall(VecGetArray(mumps->x_seq, &xarray)); 2007 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray)); 2008 PetscCall(VecRestoreArray(mumps->x_seq, &xarray)); 2009 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2010 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2011 2012 if (mumps->ICNTL20 == 10) { // distributed RHS 2013 PetscCall(VecRestoreArrayRead(b, &barray)); 2014 } else if (!mumps->myid) { // centralized RHS 2015 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 2016 } 2017 } else { 2018 // id.rhs has the solution in mumps precision 2019 PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array)); 2020 PetscCall(VecRestoreArray(x, &array)); 2021 } 2022 2023 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2024 PetscFunctionReturn(PETSC_SUCCESS); 2025 } 2026 2027 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 2028 { 2029 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2030 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2031 2032 PetscFunctionBegin; 2033 mumps->id.ICNTL(9) = 0; 2034 PetscCall(MatSolve_MUMPS(A, b, x)); 2035 mumps->id.ICNTL(9) = value; 2036 PetscFunctionReturn(PETSC_SUCCESS); 2037 } 2038 2039 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 2040 { 2041 Mat Bt = NULL; 2042 PetscBool denseX, denseB, flg, flgT; 2043 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2044 PetscInt i, nrhs, M, nrhsM; 2045 PetscScalar *array; 2046 const PetscScalar *barray; 2047 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 2048 PetscMUMPSInt *isol_loc, *isol_loc_save; 2049 PetscScalar *sol_loc; 2050 void *sol_loc_save; 2051 PetscCount sol_loc_len_save; 2052 IS is_to, is_from; 2053 PetscInt k, proc, j, m, myrstart; 2054 const PetscInt *rstart; 2055 Vec v_mpi, msol_loc; 2056 VecScatter scat_sol; 2057 Vec b_seq; 2058 VecScatter scat_rhs; 2059 PetscScalar *aa; 2060 PetscInt spnr, *ia, *ja; 2061 Mat_MPIAIJ *b = NULL; 2062 2063 PetscFunctionBegin; 2064 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 2065 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 2066 2067 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 2068 2069 if (denseB) { 2070 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 2071 mumps->id.ICNTL(20) = 0; /* dense RHS */ 2072 } else { /* sparse B */ 2073 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 2074 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 2075 PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 2076 PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 2077 /* input B is transpose of actual RHS matrix, 2078 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 2079 PetscCall(MatTransposeGetMat(B, &Bt)); 2080 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 2081 } 2082 2083 PetscCall(MatGetSize(B, &M, &nrhs)); 2084 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 2085 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 2086 mumps->id.lrhs = (PetscMUMPSInt)M; 2087 2088 if (mumps->petsc_size == 1) { // handle this easy case specially and return early 2089 PetscScalar *aa; 2090 PetscInt spnr, *ia, *ja; 2091 PetscBool second_solve = PETSC_FALSE; 2092 2093 PetscCall(MatDenseGetArray(X, &array)); 2094 if (denseB) { 2095 /* copy B to X */ 2096 PetscCall(MatDenseGetArrayRead(B, &barray)); 2097 PetscCall(PetscArraycpy(array, barray, nrhsM)); 2098 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2099 } else { /* sparse B */ 2100 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 2101 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2102 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2103 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2104 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2105 } 2106 PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2107 2108 /* handle condensation step of Schur complement (if any) */ 2109 if (mumps->id.size_schur > 0) { 2110 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 2111 second_solve = PETSC_TRUE; 2112 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 2113 mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */ 2114 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 2115 } 2116 2117 mumps->id.job = JOB_SOLVE; 2118 PetscMUMPS_c(mumps); 2119 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2120 2121 /* handle expansion step of Schur complement (if any) */ 2122 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 2123 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 2124 PetscCall(MatMumpsSolveSchur_Private(A)); 2125 for (j = 0; j < nrhs; ++j) 2126 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs); 2127 } 2128 2129 if (!denseB) { /* sparse B, restore ia, ja */ 2130 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 2131 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2132 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2133 } 2134 2135 // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X. 2136 PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array)); 2137 PetscCall(MatDenseRestoreArray(X, &array)); 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 2142 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 2143 2144 /* create msol_loc to hold mumps local solution */ 2145 isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */ 2146 sol_loc_save = mumps->id.sol_loc; 2147 sol_loc_len_save = mumps->id.sol_loc_len; 2148 mumps->id.isol_loc = NULL; // an init state 2149 mumps->id.sol_loc = NULL; 2150 mumps->id.sol_loc_len = 0; 2151 2152 lsol_loc = mumps->id.lsol_loc; 2153 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 2154 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 2155 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2156 mumps->id.isol_loc = isol_loc; 2157 2158 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 2159 2160 if (denseB) { 2161 if (mumps->ICNTL20 == 10) { 2162 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 2163 PetscCall(MatDenseGetArrayRead(B, &barray)); 2164 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc 2165 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2166 PetscCall(MatGetLocalSize(B, &m, NULL)); 2167 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X 2168 } else { 2169 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 2170 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 2171 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 2172 0, re-arrange B into desired order, which is a local operation. 2173 */ 2174 2175 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 2176 /* wrap dense rhs matrix B into a vector v_mpi */ 2177 PetscCall(MatGetLocalSize(B, &m, NULL)); 2178 PetscCall(MatDenseGetArrayRead(B, &barray)); 2179 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2180 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2181 2182 /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */ 2183 if (!mumps->myid) { 2184 PetscInt *idx; 2185 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 2186 PetscCall(PetscMalloc1(nrhsM, &idx)); 2187 PetscCall(MatGetOwnershipRanges(B, &rstart)); 2188 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 2189 for (j = 0; j < nrhs; j++) { 2190 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 2191 } 2192 } 2193 2194 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 2195 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 2196 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 2197 } else { 2198 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 2199 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 2200 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 2201 } 2202 2203 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 2204 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2205 PetscCall(ISDestroy(&is_to)); 2206 PetscCall(ISDestroy(&is_from)); 2207 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2208 2209 if (!mumps->myid) { /* define rhs on the host */ 2210 PetscCall(VecGetArrayRead(b_seq, &barray)); 2211 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2212 PetscCall(VecRestoreArrayRead(b_seq, &barray)); 2213 } 2214 } 2215 } else { /* sparse B */ 2216 b = (Mat_MPIAIJ *)Bt->data; 2217 2218 /* wrap dense X into a vector v_mpi */ 2219 PetscCall(MatGetLocalSize(X, &m, NULL)); 2220 PetscCall(MatDenseGetArrayRead(X, &barray)); 2221 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2222 PetscCall(MatDenseRestoreArrayRead(X, &barray)); 2223 2224 if (!mumps->myid) { 2225 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 2226 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2227 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2228 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2229 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2230 } else { 2231 mumps->id.irhs_ptr = NULL; 2232 mumps->id.irhs_sparse = NULL; 2233 mumps->id.nz_rhs = 0; 2234 if (mumps->id.rhs_sparse_len) { 2235 PetscCall(PetscFree(mumps->id.rhs_sparse)); 2236 mumps->id.rhs_sparse_len = 0; 2237 } 2238 } 2239 } 2240 2241 /* solve phase */ 2242 mumps->id.job = JOB_SOLVE; 2243 PetscMUMPS_c(mumps); 2244 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2245 2246 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 2247 PetscCall(MatDenseGetArray(X, &array)); 2248 PetscCall(VecPlaceArray(v_mpi, array)); 2249 2250 /* create scatter scat_sol */ 2251 PetscCall(MatGetOwnershipRanges(X, &rstart)); 2252 /* iidx: index for scatter mumps solution to PETSc X */ 2253 2254 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 2255 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 2256 for (i = 0; i < lsol_loc; i++) { 2257 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 2258 2259 for (proc = 0; proc < mumps->petsc_size; proc++) { 2260 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 2261 myrstart = rstart[proc]; 2262 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 2263 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 2264 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 2265 break; 2266 } 2267 } 2268 2269 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 2270 } 2271 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 2272 PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[] 2273 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 2274 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2275 PetscCall(ISDestroy(&is_from)); 2276 PetscCall(ISDestroy(&is_to)); 2277 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2278 PetscCall(MatDenseRestoreArray(X, &array)); 2279 2280 if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers 2281 mumps->id.sol_loc_len = 0; 2282 PetscCall(PetscFree(mumps->id.sol_loc)); 2283 } 2284 2285 // restore old values 2286 mumps->id.sol_loc = sol_loc_save; 2287 mumps->id.sol_loc_len = sol_loc_len_save; 2288 mumps->id.isol_loc = isol_loc_save; 2289 2290 PetscCall(PetscFree2(sol_loc, isol_loc)); 2291 PetscCall(PetscFree(idxx)); 2292 PetscCall(VecDestroy(&msol_loc)); 2293 PetscCall(VecDestroy(&v_mpi)); 2294 if (!denseB) { 2295 if (!mumps->myid) { 2296 b = (Mat_MPIAIJ *)Bt->data; 2297 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 2298 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2299 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2300 } 2301 } else { 2302 if (mumps->ICNTL20 == 0) { 2303 PetscCall(VecDestroy(&b_seq)); 2304 PetscCall(VecScatterDestroy(&scat_rhs)); 2305 } 2306 } 2307 PetscCall(VecScatterDestroy(&scat_sol)); 2308 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2309 PetscFunctionReturn(PETSC_SUCCESS); 2310 } 2311 2312 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 2313 { 2314 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2315 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2316 2317 PetscFunctionBegin; 2318 mumps->id.ICNTL(9) = 0; 2319 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2320 mumps->id.ICNTL(9) = value; 2321 PetscFunctionReturn(PETSC_SUCCESS); 2322 } 2323 2324 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 2325 { 2326 PetscBool flg; 2327 Mat B; 2328 2329 PetscFunctionBegin; 2330 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2331 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 2332 2333 /* Create B=Bt^T that uses Bt's data structure */ 2334 PetscCall(MatCreateTranspose(Bt, &B)); 2335 2336 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2337 PetscCall(MatDestroy(&B)); 2338 PetscFunctionReturn(PETSC_SUCCESS); 2339 } 2340 2341 #if !defined(PETSC_USE_COMPLEX) 2342 /* 2343 input: 2344 F: numeric factor 2345 output: 2346 nneg: total number of negative pivots 2347 nzero: total number of zero pivots 2348 npos: (global dimension of F) - nneg - nzero 2349 */ 2350 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 2351 { 2352 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2353 PetscMPIInt size; 2354 2355 PetscFunctionBegin; 2356 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 2357 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 2358 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 2359 2360 if (nneg) *nneg = mumps->id.INFOG(12); 2361 if (nzero || npos) { 2362 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2363 if (nzero) *nzero = mumps->id.INFOG(28); 2364 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 #endif 2369 2370 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 2371 { 2372 PetscMPIInt nreqs; 2373 PetscMUMPSInt *irn, *jcn; 2374 PetscMPIInt count; 2375 PetscCount totnnz, remain; 2376 const PetscInt osize = mumps->omp_comm_size; 2377 PetscScalar *val; 2378 2379 PetscFunctionBegin; 2380 if (osize > 1) { 2381 if (reuse == MAT_INITIAL_MATRIX) { 2382 /* master first gathers counts of nonzeros to receive */ 2383 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 2384 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 2385 2386 /* Then each computes number of send/recvs */ 2387 if (mumps->is_omp_master) { 2388 /* Start from 1 since self communication is not done in MPI */ 2389 nreqs = 0; 2390 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 2391 } else { 2392 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 2393 } 2394 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 2395 2396 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 2397 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 2398 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 2399 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 2400 */ 2401 nreqs = 0; /* counter for actual send/recvs */ 2402 if (mumps->is_omp_master) { 2403 totnnz = 0; 2404 2405 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 2406 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 2407 PetscCall(PetscMalloc1(totnnz, &val)); 2408 2409 /* Self communication */ 2410 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 2411 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 2412 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 2413 2414 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 2415 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 2416 PetscCall(PetscFree(mumps->val_alloc)); 2417 mumps->nnz = totnnz; 2418 mumps->irn = irn; 2419 mumps->jcn = jcn; 2420 mumps->val = mumps->val_alloc = val; 2421 2422 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2423 jcn += mumps->recvcount[0]; 2424 val += mumps->recvcount[0]; 2425 2426 /* Remote communication */ 2427 for (PetscMPIInt i = 1; i < osize; i++) { 2428 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2429 remain = mumps->recvcount[i] - count; 2430 while (count > 0) { 2431 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2432 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2433 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2434 irn += count; 2435 jcn += count; 2436 val += count; 2437 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2438 remain -= count; 2439 } 2440 } 2441 } else { 2442 irn = mumps->irn; 2443 jcn = mumps->jcn; 2444 val = mumps->val; 2445 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2446 remain = mumps->nnz - count; 2447 while (count > 0) { 2448 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2449 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2450 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2451 irn += count; 2452 jcn += count; 2453 val += count; 2454 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2455 remain -= count; 2456 } 2457 } 2458 } else { 2459 nreqs = 0; 2460 if (mumps->is_omp_master) { 2461 val = mumps->val + mumps->recvcount[0]; 2462 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 2463 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2464 remain = mumps->recvcount[i] - count; 2465 while (count > 0) { 2466 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2467 val += count; 2468 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2469 remain -= count; 2470 } 2471 } 2472 } else { 2473 val = mumps->val; 2474 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2475 remain = mumps->nnz - count; 2476 while (count > 0) { 2477 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2478 val += count; 2479 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2480 remain -= count; 2481 } 2482 } 2483 } 2484 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2485 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2486 } 2487 PetscFunctionReturn(PETSC_SUCCESS); 2488 } 2489 2490 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2491 { 2492 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2493 2494 PetscFunctionBegin; 2495 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2496 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2497 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2498 PetscFunctionReturn(PETSC_SUCCESS); 2499 } 2500 2501 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2502 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2503 2504 /* numerical factorization phase */ 2505 mumps->id.job = JOB_FACTNUMERIC; 2506 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2507 if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2508 } else { 2509 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2510 } 2511 2512 if (F->schur) { 2513 const PetscScalar *array; 2514 MUMPS_INT size = mumps->id.size_schur; 2515 PetscCall(MatDenseGetArrayRead(F->schur, &array)); 2516 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur)); 2517 PetscCall(MatDenseRestoreArrayRead(F->schur, &array)); 2518 } 2519 2520 PetscMUMPS_c(mumps); 2521 if (mumps->id.INFOG(1) < 0) { 2522 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2523 if (mumps->id.INFOG(1) == -10) { 2524 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2525 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2526 } else if (mumps->id.INFOG(1) == -13) { 2527 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2528 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2529 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2530 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2531 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2532 } else { 2533 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2534 F->factorerrortype = MAT_FACTOR_OTHER; 2535 } 2536 } 2537 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2538 2539 F->assembled = PETSC_TRUE; 2540 2541 if (F->schur) { /* reset Schur status to unfactored */ 2542 #if defined(PETSC_HAVE_CUDA) 2543 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2544 #endif 2545 PetscScalar *array; 2546 PetscCall(MatDenseGetArray(F->schur, &array)); 2547 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array)); 2548 PetscCall(MatDenseRestoreArray(F->schur, &array)); 2549 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2550 mumps->id.ICNTL(19) = 2; 2551 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2552 } 2553 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2554 } 2555 2556 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2557 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2558 2559 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2560 // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the 2561 // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for 2562 // which the solution (in SOL_loc) is available on the local processor. 2563 // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will 2564 // normally have the same contents for each of these calls. The only exception is the case of 2565 // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non 2566 // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase. 2567 if (mumps->petsc_size > 1) { 2568 PetscInt lsol_loc; 2569 PetscScalar *array; 2570 2571 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2572 if (mumps->x_seq) { 2573 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2574 PetscCall(PetscFree(mumps->id.isol_loc)); 2575 PetscCall(VecDestroy(&mumps->x_seq)); 2576 } 2577 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2578 PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc)); 2579 PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq)); 2580 PetscCall(VecGetArray(mumps->x_seq, &array)); 2581 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2582 PetscCall(VecRestoreArray(mumps->x_seq, &array)); 2583 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2584 } 2585 PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2))); 2586 PetscFunctionReturn(PETSC_SUCCESS); 2587 } 2588 2589 /* Sets MUMPS options from the options database */ 2590 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2591 { 2592 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2593 PetscReal cntl; 2594 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2595 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2596 PetscBool flg = PETSC_FALSE; 2597 PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1); 2598 void *arr; 2599 2600 PetscFunctionBegin; 2601 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2602 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2603 PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE; 2604 PetscInt nthreads = 0; 2605 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2606 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2607 PetscMUMPSInt nblk, *blkvar, *blkptr; 2608 2609 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2610 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2611 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2612 2613 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2614 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2615 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2616 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2617 if (mumps->use_petsc_omp_support) { 2618 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2619 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2620 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2621 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2622 #else 2623 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2624 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2625 #endif 2626 } else { 2627 mumps->omp_comm = PETSC_COMM_SELF; 2628 mumps->mumps_comm = mumps->petsc_comm; 2629 mumps->is_omp_master = PETSC_TRUE; 2630 } 2631 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2632 mumps->reqs = NULL; 2633 mumps->tag = 0; 2634 2635 if (mumps->mumps_comm != MPI_COMM_NULL) { 2636 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2637 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2638 MPI_Comm comm; 2639 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2640 mumps->mumps_comm = comm; 2641 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2642 } 2643 2644 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2645 mumps->id.job = JOB_INIT; 2646 mumps->id.par = 1; /* host participates factorizaton and solve */ 2647 mumps->id.sym = mumps->sym; 2648 2649 size = mumps->id.size_schur; 2650 arr = mumps->id.schur; 2651 listvar_schur = mumps->id.listvar_schur; 2652 nblk = mumps->id.nblk; 2653 blkvar = mumps->id.blkvar; 2654 blkptr = mumps->id.blkptr; 2655 if (PetscDefined(USE_DEBUG)) { 2656 for (PetscInt i = 0; i < size; i++) 2657 PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1, 2658 A->rmap->N); 2659 } 2660 2661 PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL)); 2662 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]); 2663 PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar"); 2664 PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision)); 2665 2666 PetscMUMPS_c(mumps); 2667 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2668 2669 /* set PETSc-MUMPS default options - override MUMPS default */ 2670 mumps->id.ICNTL(3) = 0; 2671 mumps->id.ICNTL(4) = 0; 2672 if (mumps->petsc_size == 1) { 2673 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2674 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2675 } else { 2676 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2677 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2678 } 2679 if (nblk && blkptr) { 2680 mumps->id.ICNTL(15) = 1; 2681 mumps->id.nblk = nblk; 2682 mumps->id.blkvar = blkvar; 2683 mumps->id.blkptr = blkptr; 2684 } else mumps->id.ICNTL(15) = 0; 2685 2686 /* restore cached ICNTL and CNTL values */ 2687 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2688 for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]); 2689 2690 PetscCall(PetscFree(mumps->ICNTL_pre)); 2691 PetscCall(PetscFree(mumps->CNTL_pre)); 2692 2693 if (schur) { 2694 mumps->id.size_schur = size; 2695 mumps->id.schur_lld = size; 2696 mumps->id.schur = arr; 2697 mumps->id.listvar_schur = listvar_schur; 2698 if (mumps->petsc_size > 1) { 2699 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2700 2701 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2702 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2703 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm)); 2704 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2705 } else { 2706 if (F->factortype == MAT_FACTOR_LU) { 2707 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2708 } else { 2709 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2710 } 2711 } 2712 mumps->id.ICNTL(26) = -1; 2713 } 2714 2715 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2716 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2717 */ 2718 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2719 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); 2720 2721 mumps->scat_rhs = NULL; 2722 mumps->scat_sol = NULL; 2723 } 2724 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2725 if (flg) mumps->id.ICNTL(1) = icntl; 2726 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2727 if (flg) mumps->id.ICNTL(2) = icntl; 2728 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2729 if (flg) mumps->id.ICNTL(3) = icntl; 2730 2731 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2732 if (flg) mumps->id.ICNTL(4) = icntl; 2733 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2734 2735 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2736 if (flg) mumps->id.ICNTL(6) = icntl; 2737 2738 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2739 if (flg) { 2740 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2741 mumps->id.ICNTL(7) = icntl; 2742 } 2743 2744 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2745 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2746 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2747 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2748 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2749 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2750 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2751 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2752 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2753 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2754 if (flg) { 2755 if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2756 else if (mumps->id.ICNTL(15) > 0) { 2757 const PetscInt *bsizes; 2758 PetscInt nblocks, p, *blkptr = NULL; 2759 PetscMPIInt *recvcounts, *displs, n; 2760 PetscMPIInt rank, size = 0; 2761 2762 PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 2763 flg = PETSC_TRUE; 2764 for (p = 0; p < nblocks; ++p) { 2765 if (bsizes[p] > 1) break; 2766 } 2767 if (p == nblocks) flg = PETSC_FALSE; 2768 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 2769 if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 2770 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2771 if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2772 PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 2773 PetscCall(PetscMPIIntCast(nblocks, &n)); 2774 PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 2775 for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 2776 PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 2777 PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2778 PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2779 if (rank == 0) { 2780 blkptr[0] = 1; 2781 for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 2782 PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 2783 } 2784 PetscCall(PetscFree2(recvcounts, displs)); 2785 PetscCall(PetscFree(blkptr)); 2786 } 2787 } 2788 } 2789 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2790 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2791 PetscCall(MatDestroy(&F->schur)); 2792 PetscCall(MatMumpsResetSchur_Private(mumps)); 2793 } 2794 2795 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2796 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2797 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2798 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2799 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2800 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2801 */ 2802 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2803 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2804 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2805 #endif 2806 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2807 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2808 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2809 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2810 #endif 2811 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2812 2813 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2814 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2815 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2816 if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 2817 2818 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2819 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2820 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2821 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2822 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2823 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2824 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2825 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2826 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2827 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2828 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2829 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2830 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2831 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2832 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2833 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2834 2835 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg)); 2836 if (flg) ID_CNTL_SET(mumps->id, 1, cntl); 2837 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg)); 2838 if (flg) ID_CNTL_SET(mumps->id, 2, cntl); 2839 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg)); 2840 if (flg) ID_CNTL_SET(mumps->id, 3, cntl); 2841 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg)); 2842 if (flg) ID_CNTL_SET(mumps->id, 4, cntl); 2843 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg)); 2844 if (flg) ID_CNTL_SET(mumps->id, 5, cntl); 2845 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg)); 2846 if (flg) ID_CNTL_SET(mumps->id, 7, cntl); 2847 2848 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2849 2850 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2851 if (ninfo) { 2852 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2853 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2854 mumps->ninfo = ninfo; 2855 for (i = 0; i < ninfo; i++) { 2856 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2857 mumps->info[i] = info[i]; 2858 } 2859 } 2860 PetscOptionsEnd(); 2861 PetscFunctionReturn(PETSC_SUCCESS); 2862 } 2863 2864 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2865 { 2866 PetscFunctionBegin; 2867 if (mumps->id.INFOG(1) < 0) { 2868 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2869 if (mumps->id.INFOG(1) == -6) { 2870 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2871 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2872 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2873 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2874 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2875 } else { 2876 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2877 F->factorerrortype = MAT_FACTOR_OTHER; 2878 } 2879 } 2880 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2881 PetscFunctionReturn(PETSC_SUCCESS); 2882 } 2883 2884 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2885 { 2886 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2887 Vec b; 2888 const PetscInt M = A->rmap->N; 2889 2890 PetscFunctionBegin; 2891 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2892 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2893 PetscFunctionReturn(PETSC_SUCCESS); 2894 } 2895 2896 /* Set MUMPS options from the options database */ 2897 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2898 2899 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2900 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2901 2902 /* analysis phase */ 2903 mumps->id.job = JOB_FACTSYMBOLIC; 2904 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2905 switch (mumps->id.ICNTL(18)) { 2906 case 0: /* centralized assembled matrix input */ 2907 if (!mumps->myid) { 2908 mumps->id.nnz = mumps->nnz; 2909 mumps->id.irn = mumps->irn; 2910 mumps->id.jcn = mumps->jcn; 2911 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2912 if (r && mumps->id.ICNTL(7) == 7) { 2913 mumps->id.ICNTL(7) = 1; 2914 if (!mumps->myid) { 2915 const PetscInt *idx; 2916 PetscInt i; 2917 2918 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2919 PetscCall(ISGetIndices(r, &idx)); 2920 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2921 PetscCall(ISRestoreIndices(r, &idx)); 2922 } 2923 } 2924 } 2925 break; 2926 case 3: /* distributed assembled matrix input (size>1) */ 2927 mumps->id.nnz_loc = mumps->nnz; 2928 mumps->id.irn_loc = mumps->irn; 2929 mumps->id.jcn_loc = mumps->jcn; 2930 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2931 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2932 PetscCall(MatCreateVecs(A, NULL, &b)); 2933 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2934 PetscCall(VecDestroy(&b)); 2935 } 2936 break; 2937 } 2938 PetscMUMPS_c(mumps); 2939 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2940 2941 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2942 F->ops->solve = MatSolve_MUMPS; 2943 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2944 F->ops->matsolve = MatMatSolve_MUMPS; 2945 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2946 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2947 2948 mumps->matstruc = SAME_NONZERO_PATTERN; 2949 PetscFunctionReturn(PETSC_SUCCESS); 2950 } 2951 2952 /* Note the PETSc r and c permutations are ignored */ 2953 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2954 { 2955 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2956 Vec b; 2957 const PetscInt M = A->rmap->N; 2958 2959 PetscFunctionBegin; 2960 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2961 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2962 PetscFunctionReturn(PETSC_SUCCESS); 2963 } 2964 2965 /* Set MUMPS options from the options database */ 2966 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2967 2968 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2969 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2970 2971 /* analysis phase */ 2972 mumps->id.job = JOB_FACTSYMBOLIC; 2973 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2974 switch (mumps->id.ICNTL(18)) { 2975 case 0: /* centralized assembled matrix input */ 2976 if (!mumps->myid) { 2977 mumps->id.nnz = mumps->nnz; 2978 mumps->id.irn = mumps->irn; 2979 mumps->id.jcn = mumps->jcn; 2980 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2981 } 2982 break; 2983 case 3: /* distributed assembled matrix input (size>1) */ 2984 mumps->id.nnz_loc = mumps->nnz; 2985 mumps->id.irn_loc = mumps->irn; 2986 mumps->id.jcn_loc = mumps->jcn; 2987 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2988 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2989 PetscCall(MatCreateVecs(A, NULL, &b)); 2990 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2991 PetscCall(VecDestroy(&b)); 2992 } 2993 break; 2994 } 2995 PetscMUMPS_c(mumps); 2996 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2997 2998 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2999 F->ops->solve = MatSolve_MUMPS; 3000 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 3001 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3002 3003 mumps->matstruc = SAME_NONZERO_PATTERN; 3004 PetscFunctionReturn(PETSC_SUCCESS); 3005 } 3006 3007 /* Note the PETSc r permutation and factor info are ignored */ 3008 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 3009 { 3010 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3011 Vec b; 3012 const PetscInt M = A->rmap->N; 3013 3014 PetscFunctionBegin; 3015 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 3016 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 3017 PetscFunctionReturn(PETSC_SUCCESS); 3018 } 3019 3020 /* Set MUMPS options from the options database */ 3021 PetscCall(MatSetFromOptions_MUMPS(F, A)); 3022 3023 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 3024 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 3025 3026 /* analysis phase */ 3027 mumps->id.job = JOB_FACTSYMBOLIC; 3028 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 3029 switch (mumps->id.ICNTL(18)) { 3030 case 0: /* centralized assembled matrix input */ 3031 if (!mumps->myid) { 3032 mumps->id.nnz = mumps->nnz; 3033 mumps->id.irn = mumps->irn; 3034 mumps->id.jcn = mumps->jcn; 3035 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 3036 } 3037 break; 3038 case 3: /* distributed assembled matrix input (size>1) */ 3039 mumps->id.nnz_loc = mumps->nnz; 3040 mumps->id.irn_loc = mumps->irn; 3041 mumps->id.jcn_loc = mumps->jcn; 3042 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 3043 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 3044 PetscCall(MatCreateVecs(A, NULL, &b)); 3045 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 3046 PetscCall(VecDestroy(&b)); 3047 } 3048 break; 3049 } 3050 PetscMUMPS_c(mumps); 3051 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3052 3053 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 3054 F->ops->solve = MatSolve_MUMPS; 3055 F->ops->solvetranspose = MatSolve_MUMPS; 3056 F->ops->matsolve = MatMatSolve_MUMPS; 3057 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 3058 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3059 #if defined(PETSC_USE_COMPLEX) 3060 F->ops->getinertia = NULL; 3061 #else 3062 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 3063 #endif 3064 3065 mumps->matstruc = SAME_NONZERO_PATTERN; 3066 PetscFunctionReturn(PETSC_SUCCESS); 3067 } 3068 3069 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 3070 { 3071 PetscBool isascii; 3072 PetscViewerFormat format; 3073 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3074 3075 PetscFunctionBegin; 3076 /* check if matrix is mumps type */ 3077 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 3078 3079 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 3080 if (isascii) { 3081 PetscCall(PetscViewerGetFormat(viewer, &format)); 3082 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3083 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 3084 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3085 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 3086 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 3087 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 3088 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 3089 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 3090 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 3091 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 3092 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 3093 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 3094 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 3095 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 3096 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 3097 if (mumps->id.ICNTL(11) > 0) { 3098 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4))); 3099 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5))); 3100 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6))); 3101 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8))); 3102 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9))); 3103 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11))); 3104 } 3105 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 3106 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 3107 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 3108 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 3109 /* ICNTL(15-17) not used */ 3110 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 3111 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 3112 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 3113 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 3114 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 3115 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 3116 3117 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 3118 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 3119 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 3120 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 3121 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 3122 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 3123 3124 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 3125 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 3126 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 3127 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 3128 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 3129 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 3130 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 3131 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 3132 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 3133 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 3134 3135 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1))); 3136 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2))); 3137 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3))); 3138 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4))); 3139 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5))); 3140 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7))); 3141 3142 /* information local to each processor */ 3143 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 3144 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 3145 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1))); 3146 PetscCall(PetscViewerFlush(viewer)); 3147 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 3148 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2))); 3149 PetscCall(PetscViewerFlush(viewer)); 3150 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 3151 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3))); 3152 PetscCall(PetscViewerFlush(viewer)); 3153 3154 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 3155 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 3156 PetscCall(PetscViewerFlush(viewer)); 3157 3158 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 3159 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 3160 PetscCall(PetscViewerFlush(viewer)); 3161 3162 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 3163 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 3164 PetscCall(PetscViewerFlush(viewer)); 3165 3166 if (mumps->ninfo && mumps->ninfo <= 80) { 3167 PetscInt i; 3168 for (i = 0; i < mumps->ninfo; i++) { 3169 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 3170 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 3171 PetscCall(PetscViewerFlush(viewer)); 3172 } 3173 } 3174 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 3175 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 3176 3177 if (mumps->myid == 0) { /* information from the host */ 3178 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1))); 3179 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2))); 3180 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3))); 3181 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34))); 3182 3183 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 3184 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 3185 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 3186 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 3187 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 3188 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 3189 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 3190 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 3191 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 3192 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 3193 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 3194 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 3195 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 3196 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 3197 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 3198 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 3199 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 3200 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 3201 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 3202 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 3203 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 3204 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 3205 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 3206 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 3207 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 3208 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 3209 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 3210 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 3211 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 3212 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 3213 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 3214 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 3215 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 3216 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 3217 } 3218 } 3219 } 3220 PetscFunctionReturn(PETSC_SUCCESS); 3221 } 3222 3223 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 3224 { 3225 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3226 3227 PetscFunctionBegin; 3228 info->block_size = 1.0; 3229 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3230 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3231 info->nz_unneeded = 0.0; 3232 info->assemblies = 0.0; 3233 info->mallocs = 0.0; 3234 info->memory = 0.0; 3235 info->fill_ratio_given = 0; 3236 info->fill_ratio_needed = 0; 3237 info->factor_mallocs = 0; 3238 PetscFunctionReturn(PETSC_SUCCESS); 3239 } 3240 3241 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 3242 { 3243 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3244 const PetscScalar *arr; 3245 const PetscInt *idxs; 3246 PetscInt size, i; 3247 3248 PetscFunctionBegin; 3249 PetscCall(ISGetLocalSize(is, &size)); 3250 /* Schur complement matrix */ 3251 PetscCall(MatDestroy(&F->schur)); 3252 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 3253 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 3254 // don't allocate mumps->id.schur[] now as its precision is yet to know 3255 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 3256 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 3257 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 3258 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 3259 3260 /* MUMPS expects Fortran style indices */ 3261 PetscCall(PetscFree(mumps->id.listvar_schur)); 3262 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 3263 PetscCall(ISGetIndices(is, &idxs)); 3264 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 3265 PetscCall(ISRestoreIndices(is, &idxs)); 3266 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 3267 if (mumps->id.icntl) mumps->id.ICNTL(26) = -1; 3268 else mumps->ICNTL26 = -1; 3269 PetscFunctionReturn(PETSC_SUCCESS); 3270 } 3271 3272 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 3273 { 3274 Mat St; 3275 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3276 PetscScalar *array; 3277 PetscInt i, j, N = mumps->id.size_schur; 3278 3279 PetscFunctionBegin; 3280 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 3281 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 3282 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 3283 PetscCall(MatSetType(St, MATDENSE)); 3284 PetscCall(MatSetUp(St)); 3285 PetscCall(MatDenseGetArray(St, &array)); 3286 if (!mumps->sym) { /* MUMPS always return a full matrix */ 3287 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 3288 for (i = 0; i < N; i++) { 3289 for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3290 } 3291 } else { /* stored by columns */ 3292 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3293 } 3294 } else { /* either full or lower-triangular (not packed) */ 3295 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 3296 for (i = 0; i < N; i++) { 3297 for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3298 } 3299 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 3300 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3301 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 3302 for (i = 0; i < N; i++) { 3303 for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3304 } 3305 } 3306 } 3307 PetscCall(MatDenseRestoreArray(St, &array)); 3308 *S = St; 3309 PetscFunctionReturn(PETSC_SUCCESS); 3310 } 3311 3312 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 3313 { 3314 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3315 3316 PetscFunctionBegin; 3317 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 3318 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 3319 for (i = 0; i < nICNTL_pre; ++i) 3320 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 3321 if (i == nICNTL_pre) { /* not already cached */ 3322 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 3323 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 3324 mumps->ICNTL_pre[0]++; 3325 } 3326 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 3327 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 3328 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 3329 PetscFunctionReturn(PETSC_SUCCESS); 3330 } 3331 3332 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 3333 { 3334 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3335 3336 PetscFunctionBegin; 3337 if (mumps->id.job == JOB_NULL) { 3338 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 3339 *ival = 0; 3340 for (i = 0; i < nICNTL_pre; ++i) { 3341 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 3342 } 3343 } else *ival = mumps->id.ICNTL(icntl); 3344 PetscFunctionReturn(PETSC_SUCCESS); 3345 } 3346 3347 /*@ 3348 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3349 3350 Logically Collective 3351 3352 Input Parameters: 3353 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3354 . icntl - index of MUMPS parameter array `ICNTL()` 3355 - ival - value of MUMPS `ICNTL(icntl)` 3356 3357 Options Database Key: 3358 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 3359 3360 Level: beginner 3361 3362 Note: 3363 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3364 3365 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3366 @*/ 3367 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 3368 { 3369 PetscFunctionBegin; 3370 PetscValidType(F, 1); 3371 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3372 PetscValidLogicalCollectiveInt(F, icntl, 2); 3373 PetscValidLogicalCollectiveInt(F, ival, 3); 3374 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3375 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 3376 PetscFunctionReturn(PETSC_SUCCESS); 3377 } 3378 3379 /*@ 3380 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3381 3382 Logically Collective 3383 3384 Input Parameters: 3385 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3386 - icntl - index of MUMPS parameter array ICNTL() 3387 3388 Output Parameter: 3389 . ival - value of MUMPS ICNTL(icntl) 3390 3391 Level: beginner 3392 3393 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3394 @*/ 3395 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 3396 { 3397 PetscFunctionBegin; 3398 PetscValidType(F, 1); 3399 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3400 PetscValidLogicalCollectiveInt(F, icntl, 2); 3401 PetscAssertPointer(ival, 3); 3402 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3403 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3404 PetscFunctionReturn(PETSC_SUCCESS); 3405 } 3406 3407 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 3408 { 3409 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3410 3411 PetscFunctionBegin; 3412 if (mumps->id.job == JOB_NULL) { 3413 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3414 for (i = 0; i < nCNTL_pre; ++i) 3415 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 3416 if (i == nCNTL_pre) { 3417 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 3418 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 3419 mumps->CNTL_pre[0]++; 3420 } 3421 mumps->CNTL_pre[1 + 2 * i] = icntl; 3422 mumps->CNTL_pre[2 + 2 * i] = val; 3423 } else ID_CNTL_SET(mumps->id, icntl, val); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 3427 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 3428 { 3429 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3430 3431 PetscFunctionBegin; 3432 if (mumps->id.job == JOB_NULL) { 3433 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3434 *val = 0.0; 3435 for (i = 0; i < nCNTL_pre; ++i) { 3436 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 3437 } 3438 } else *val = ID_CNTL_GET(mumps->id, icntl); 3439 PetscFunctionReturn(PETSC_SUCCESS); 3440 } 3441 3442 /*@ 3443 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3444 3445 Logically Collective 3446 3447 Input Parameters: 3448 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3449 . icntl - index of MUMPS parameter array `CNTL()` 3450 - val - value of MUMPS `CNTL(icntl)` 3451 3452 Options Database Key: 3453 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 3454 3455 Level: beginner 3456 3457 Note: 3458 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3459 3460 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3461 @*/ 3462 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3463 { 3464 PetscFunctionBegin; 3465 PetscValidType(F, 1); 3466 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3467 PetscValidLogicalCollectiveInt(F, icntl, 2); 3468 PetscValidLogicalCollectiveReal(F, val, 3); 3469 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3470 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 3471 PetscFunctionReturn(PETSC_SUCCESS); 3472 } 3473 3474 /*@ 3475 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3476 3477 Logically Collective 3478 3479 Input Parameters: 3480 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3481 - icntl - index of MUMPS parameter array CNTL() 3482 3483 Output Parameter: 3484 . val - value of MUMPS CNTL(icntl) 3485 3486 Level: beginner 3487 3488 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3489 @*/ 3490 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3491 { 3492 PetscFunctionBegin; 3493 PetscValidType(F, 1); 3494 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3495 PetscValidLogicalCollectiveInt(F, icntl, 2); 3496 PetscAssertPointer(val, 3); 3497 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3498 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3499 PetscFunctionReturn(PETSC_SUCCESS); 3500 } 3501 3502 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3503 { 3504 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3505 3506 PetscFunctionBegin; 3507 *info = mumps->id.INFO(icntl); 3508 PetscFunctionReturn(PETSC_SUCCESS); 3509 } 3510 3511 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3512 { 3513 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3514 3515 PetscFunctionBegin; 3516 *infog = mumps->id.INFOG(icntl); 3517 PetscFunctionReturn(PETSC_SUCCESS); 3518 } 3519 3520 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3521 { 3522 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3523 3524 PetscFunctionBegin; 3525 *rinfo = ID_RINFO_GET(mumps->id, icntl); 3526 PetscFunctionReturn(PETSC_SUCCESS); 3527 } 3528 3529 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3530 { 3531 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3532 3533 PetscFunctionBegin; 3534 *rinfog = ID_RINFOG_GET(mumps->id, icntl); 3535 PetscFunctionReturn(PETSC_SUCCESS); 3536 } 3537 3538 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 3539 { 3540 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3541 3542 PetscFunctionBegin; 3543 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 3544 *size = 0; 3545 *array = NULL; 3546 if (!mumps->myid) { 3547 *size = mumps->id.INFOG(28); 3548 PetscCall(PetscMalloc1(*size, array)); 3549 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 3550 } 3551 PetscFunctionReturn(PETSC_SUCCESS); 3552 } 3553 3554 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3555 { 3556 Mat Bt = NULL, Btseq = NULL; 3557 PetscBool flg; 3558 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3559 PetscScalar *aa; 3560 PetscInt spnr, *ia, *ja, M, nrhs; 3561 3562 PetscFunctionBegin; 3563 PetscAssertPointer(spRHS, 2); 3564 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3565 PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3566 PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 3567 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3568 3569 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3570 3571 if (mumps->petsc_size > 1) { 3572 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3573 Btseq = b->A; 3574 } else { 3575 Btseq = Bt; 3576 } 3577 3578 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3579 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3580 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3581 mumps->id.rhs = NULL; 3582 3583 if (!mumps->myid) { 3584 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3585 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3586 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3587 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3588 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 3589 } else { 3590 mumps->id.irhs_ptr = NULL; 3591 mumps->id.irhs_sparse = NULL; 3592 mumps->id.nz_rhs = 0; 3593 if (mumps->id.rhs_sparse_len) { 3594 PetscCall(PetscFree(mumps->id.rhs_sparse)); 3595 mumps->id.rhs_sparse_len = 0; 3596 } 3597 } 3598 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3599 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3600 3601 /* solve phase */ 3602 mumps->id.job = JOB_SOLVE; 3603 PetscMUMPS_c(mumps); 3604 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3605 3606 if (!mumps->myid) { 3607 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3608 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3609 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3610 } 3611 PetscFunctionReturn(PETSC_SUCCESS); 3612 } 3613 3614 /*@ 3615 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3616 3617 Logically Collective 3618 3619 Input Parameter: 3620 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3621 3622 Output Parameter: 3623 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3624 3625 Level: beginner 3626 3627 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3628 @*/ 3629 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3630 { 3631 PetscFunctionBegin; 3632 PetscValidType(F, 1); 3633 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3634 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3635 PetscFunctionReturn(PETSC_SUCCESS); 3636 } 3637 3638 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3639 { 3640 Mat spRHS; 3641 3642 PetscFunctionBegin; 3643 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3644 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3645 PetscCall(MatDestroy(&spRHS)); 3646 PetscFunctionReturn(PETSC_SUCCESS); 3647 } 3648 3649 /*@ 3650 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3651 3652 Logically Collective 3653 3654 Input Parameter: 3655 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3656 3657 Output Parameter: 3658 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3659 3660 Level: beginner 3661 3662 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3663 @*/ 3664 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3665 { 3666 PetscBool flg; 3667 3668 PetscFunctionBegin; 3669 PetscValidType(F, 1); 3670 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3671 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3672 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3673 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3674 PetscFunctionReturn(PETSC_SUCCESS); 3675 } 3676 3677 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3678 { 3679 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3680 3681 PetscFunctionBegin; 3682 if (nblk) { 3683 PetscAssertPointer(blkptr, 4); 3684 PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 3685 PetscCall(PetscFree(mumps->id.blkptr)); 3686 PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 3687 for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3688 // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15). 3689 // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS(). 3690 // mumps->id.ICNTL(15) = 1; 3691 if (blkvar) { 3692 PetscCall(PetscFree(mumps->id.blkvar)); 3693 PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 3694 for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 3695 } 3696 } else { 3697 PetscCall(PetscFree(mumps->id.blkptr)); 3698 PetscCall(PetscFree(mumps->id.blkvar)); 3699 // mumps->id.ICNTL(15) = 0; 3700 mumps->id.nblk = 0; 3701 } 3702 PetscFunctionReturn(PETSC_SUCCESS); 3703 } 3704 3705 /*@ 3706 MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 3707 3708 Not collective, only relevant on the first process of the MPI communicator 3709 3710 Input Parameters: 3711 + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3712 . nblk - the number of blocks 3713 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 3714 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 3715 3716 Level: advanced 3717 3718 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 3719 @*/ 3720 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3721 { 3722 PetscFunctionBegin; 3723 PetscValidType(F, 1); 3724 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3725 PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 3726 PetscFunctionReturn(PETSC_SUCCESS); 3727 } 3728 3729 /*@ 3730 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3731 3732 Logically Collective 3733 3734 Input Parameters: 3735 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3736 - icntl - index of MUMPS parameter array INFO() 3737 3738 Output Parameter: 3739 . ival - value of MUMPS INFO(icntl) 3740 3741 Level: beginner 3742 3743 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3744 @*/ 3745 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3746 { 3747 PetscFunctionBegin; 3748 PetscValidType(F, 1); 3749 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3750 PetscAssertPointer(ival, 3); 3751 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3752 PetscFunctionReturn(PETSC_SUCCESS); 3753 } 3754 3755 /*@ 3756 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3757 3758 Logically Collective 3759 3760 Input Parameters: 3761 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3762 - icntl - index of MUMPS parameter array INFOG() 3763 3764 Output Parameter: 3765 . ival - value of MUMPS INFOG(icntl) 3766 3767 Level: beginner 3768 3769 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3770 @*/ 3771 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3772 { 3773 PetscFunctionBegin; 3774 PetscValidType(F, 1); 3775 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3776 PetscAssertPointer(ival, 3); 3777 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3778 PetscFunctionReturn(PETSC_SUCCESS); 3779 } 3780 3781 /*@ 3782 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3783 3784 Logically Collective 3785 3786 Input Parameters: 3787 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3788 - icntl - index of MUMPS parameter array RINFO() 3789 3790 Output Parameter: 3791 . val - value of MUMPS RINFO(icntl) 3792 3793 Level: beginner 3794 3795 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3796 @*/ 3797 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3798 { 3799 PetscFunctionBegin; 3800 PetscValidType(F, 1); 3801 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3802 PetscAssertPointer(val, 3); 3803 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3804 PetscFunctionReturn(PETSC_SUCCESS); 3805 } 3806 3807 /*@ 3808 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3809 3810 Logically Collective 3811 3812 Input Parameters: 3813 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3814 - icntl - index of MUMPS parameter array RINFOG() 3815 3816 Output Parameter: 3817 . val - value of MUMPS RINFOG(icntl) 3818 3819 Level: beginner 3820 3821 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3822 @*/ 3823 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3824 { 3825 PetscFunctionBegin; 3826 PetscValidType(F, 1); 3827 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3828 PetscAssertPointer(val, 3); 3829 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3830 PetscFunctionReturn(PETSC_SUCCESS); 3831 } 3832 3833 /*@ 3834 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3835 3836 Logically Collective 3837 3838 Input Parameter: 3839 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3840 3841 Output Parameters: 3842 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3843 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3844 for freeing this array. 3845 3846 Level: beginner 3847 3848 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3849 @*/ 3850 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3851 { 3852 PetscFunctionBegin; 3853 PetscValidType(F, 1); 3854 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3855 PetscAssertPointer(size, 2); 3856 PetscAssertPointer(array, 3); 3857 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3858 PetscFunctionReturn(PETSC_SUCCESS); 3859 } 3860 3861 /*MC 3862 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3863 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3864 3865 Works with `MATAIJ` and `MATSBAIJ` matrices 3866 3867 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3868 3869 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3870 See details below. 3871 3872 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3873 3874 Options Database Keys: 3875 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3876 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3877 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3878 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3879 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3880 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3881 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3882 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3883 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3884 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3885 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3886 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3887 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3888 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3889 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3890 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3891 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3892 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3893 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3894 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3895 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3896 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3897 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3898 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3899 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3900 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3901 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3902 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3903 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3904 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3905 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3906 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3907 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3908 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3909 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3910 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3911 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3912 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3913 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3914 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3915 3916 Level: beginner 3917 3918 Notes: 3919 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3920 error if the matrix is Hermitian. 3921 3922 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3923 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3924 3925 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3926 the failure with 3927 .vb 3928 KSPGetPC(ksp,&pc); 3929 PCFactorGetMatrix(pc,&mat); 3930 MatMumpsGetInfo(mat,....); 3931 MatMumpsGetInfog(mat,....); etc. 3932 .ve 3933 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3934 3935 MUMPS provides 64-bit integer support in two build modes: 3936 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3937 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3938 3939 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3940 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3941 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3942 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3943 3944 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3945 3946 Two modes to run MUMPS/PETSc with OpenMP 3947 .vb 3948 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3949 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 3950 .ve 3951 3952 .vb 3953 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 3954 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3955 .ve 3956 3957 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3958 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3959 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3960 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3961 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3962 3963 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3964 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3965 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3966 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3967 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3968 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3969 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3970 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3971 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3972 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 3973 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3974 examine the mapping result. 3975 3976 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3977 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3978 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3979 3980 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3981 3982 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3983 M*/ 3984 3985 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3986 { 3987 PetscFunctionBegin; 3988 *type = MATSOLVERMUMPS; 3989 PetscFunctionReturn(PETSC_SUCCESS); 3990 } 3991 3992 /* MatGetFactor for Seq and MPI AIJ matrices */ 3993 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3994 { 3995 Mat B; 3996 Mat_MUMPS *mumps; 3997 PetscBool isSeqAIJ, isDiag, isDense; 3998 PetscMPIInt size; 3999 4000 PetscFunctionBegin; 4001 #if defined(PETSC_USE_COMPLEX) 4002 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4003 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4004 *F = NULL; 4005 PetscFunctionReturn(PETSC_SUCCESS); 4006 } 4007 #endif 4008 /* Create the factorization matrix */ 4009 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 4010 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 4011 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4012 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4013 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4014 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4015 PetscCall(MatSetUp(B)); 4016 4017 PetscCall(PetscNew(&mumps)); 4018 4019 B->ops->view = MatView_MUMPS; 4020 B->ops->getinfo = MatGetInfo_MUMPS; 4021 4022 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4023 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4024 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4025 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4026 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4027 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4028 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4029 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4030 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4031 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4032 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4033 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4034 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4035 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4036 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4037 4038 if (ftype == MAT_FACTOR_LU) { 4039 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4040 B->factortype = MAT_FACTOR_LU; 4041 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 4042 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4043 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4044 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 4045 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4046 mumps->sym = 0; 4047 } else { 4048 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4049 B->factortype = MAT_FACTOR_CHOLESKY; 4050 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 4051 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4052 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4053 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 4054 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4055 #if defined(PETSC_USE_COMPLEX) 4056 mumps->sym = 2; 4057 #else 4058 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4059 else mumps->sym = 2; 4060 #endif 4061 } 4062 4063 /* set solvertype */ 4064 PetscCall(PetscFree(B->solvertype)); 4065 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4066 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4067 if (size == 1) { 4068 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4069 B->canuseordering = PETSC_TRUE; 4070 } 4071 B->ops->destroy = MatDestroy_MUMPS; 4072 B->data = (void *)mumps; 4073 4074 *F = B; 4075 mumps->id.job = JOB_NULL; 4076 mumps->ICNTL_pre = NULL; 4077 mumps->CNTL_pre = NULL; 4078 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4079 PetscFunctionReturn(PETSC_SUCCESS); 4080 } 4081 4082 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 4083 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 4084 { 4085 Mat B; 4086 Mat_MUMPS *mumps; 4087 PetscBool isSeqSBAIJ; 4088 PetscMPIInt size; 4089 4090 PetscFunctionBegin; 4091 #if defined(PETSC_USE_COMPLEX) 4092 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4093 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4094 *F = NULL; 4095 PetscFunctionReturn(PETSC_SUCCESS); 4096 } 4097 #endif 4098 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4099 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4100 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4101 PetscCall(MatSetUp(B)); 4102 4103 PetscCall(PetscNew(&mumps)); 4104 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 4105 if (isSeqSBAIJ) { 4106 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 4107 } else { 4108 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 4109 } 4110 4111 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4112 B->ops->view = MatView_MUMPS; 4113 B->ops->getinfo = MatGetInfo_MUMPS; 4114 4115 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4116 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4117 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4118 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4119 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4120 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4121 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4122 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4123 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4124 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4125 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4126 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4127 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4128 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4129 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4130 4131 B->factortype = MAT_FACTOR_CHOLESKY; 4132 #if defined(PETSC_USE_COMPLEX) 4133 mumps->sym = 2; 4134 #else 4135 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4136 else mumps->sym = 2; 4137 #endif 4138 4139 /* set solvertype */ 4140 PetscCall(PetscFree(B->solvertype)); 4141 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4142 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4143 if (size == 1) { 4144 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4145 B->canuseordering = PETSC_TRUE; 4146 } 4147 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4148 B->ops->destroy = MatDestroy_MUMPS; 4149 B->data = (void *)mumps; 4150 4151 *F = B; 4152 mumps->id.job = JOB_NULL; 4153 mumps->ICNTL_pre = NULL; 4154 mumps->CNTL_pre = NULL; 4155 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4156 PetscFunctionReturn(PETSC_SUCCESS); 4157 } 4158 4159 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 4160 { 4161 Mat B; 4162 Mat_MUMPS *mumps; 4163 PetscBool isSeqBAIJ; 4164 PetscMPIInt size; 4165 4166 PetscFunctionBegin; 4167 /* Create the factorization matrix */ 4168 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 4169 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4170 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4171 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4172 PetscCall(MatSetUp(B)); 4173 4174 PetscCall(PetscNew(&mumps)); 4175 PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 4176 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 4177 B->factortype = MAT_FACTOR_LU; 4178 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 4179 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 4180 mumps->sym = 0; 4181 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4182 4183 B->ops->view = MatView_MUMPS; 4184 B->ops->getinfo = MatGetInfo_MUMPS; 4185 4186 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4187 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4188 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4190 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4201 4202 /* set solvertype */ 4203 PetscCall(PetscFree(B->solvertype)); 4204 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4205 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4206 if (size == 1) { 4207 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4208 B->canuseordering = PETSC_TRUE; 4209 } 4210 B->ops->destroy = MatDestroy_MUMPS; 4211 B->data = (void *)mumps; 4212 4213 *F = B; 4214 mumps->id.job = JOB_NULL; 4215 mumps->ICNTL_pre = NULL; 4216 mumps->CNTL_pre = NULL; 4217 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4218 PetscFunctionReturn(PETSC_SUCCESS); 4219 } 4220 4221 /* MatGetFactor for Seq and MPI SELL matrices */ 4222 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 4223 { 4224 Mat B; 4225 Mat_MUMPS *mumps; 4226 PetscBool isSeqSELL; 4227 PetscMPIInt size; 4228 4229 PetscFunctionBegin; 4230 /* Create the factorization matrix */ 4231 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 4232 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4233 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4234 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4235 PetscCall(MatSetUp(B)); 4236 4237 PetscCall(PetscNew(&mumps)); 4238 4239 B->ops->view = MatView_MUMPS; 4240 B->ops->getinfo = MatGetInfo_MUMPS; 4241 4242 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4243 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4244 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4245 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4246 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4247 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4248 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4249 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4250 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4251 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4252 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4253 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4254 4255 PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4256 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4257 B->factortype = MAT_FACTOR_LU; 4258 PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4259 mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 4260 mumps->sym = 0; 4261 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4262 4263 /* set solvertype */ 4264 PetscCall(PetscFree(B->solvertype)); 4265 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4266 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4267 if (size == 1) { 4268 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4269 B->canuseordering = PETSC_TRUE; 4270 } 4271 B->ops->destroy = MatDestroy_MUMPS; 4272 B->data = (void *)mumps; 4273 4274 *F = B; 4275 mumps->id.job = JOB_NULL; 4276 mumps->ICNTL_pre = NULL; 4277 mumps->CNTL_pre = NULL; 4278 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4279 PetscFunctionReturn(PETSC_SUCCESS); 4280 } 4281 4282 /* MatGetFactor for MATNEST matrices */ 4283 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 4284 { 4285 Mat B, **mats; 4286 Mat_MUMPS *mumps; 4287 PetscInt nr, nc; 4288 PetscMPIInt size; 4289 PetscBool flg = PETSC_TRUE; 4290 4291 PetscFunctionBegin; 4292 #if defined(PETSC_USE_COMPLEX) 4293 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4294 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4295 *F = NULL; 4296 PetscFunctionReturn(PETSC_SUCCESS); 4297 } 4298 #endif 4299 4300 /* Return if some condition is not satisfied */ 4301 *F = NULL; 4302 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 4303 if (ftype == MAT_FACTOR_CHOLESKY) { 4304 IS *rows, *cols; 4305 PetscInt *m, *M; 4306 4307 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 4308 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 4309 PetscCall(MatNestGetISs(A, rows, cols)); 4310 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 4311 if (!flg) { 4312 PetscCall(PetscFree2(rows, cols)); 4313 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 4314 PetscFunctionReturn(PETSC_SUCCESS); 4315 } 4316 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 4317 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 4318 for (PetscInt r = 0; flg && r < nr; r++) 4319 for (PetscInt k = r + 1; flg && k < nr; k++) 4320 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 4321 PetscCall(PetscFree2(m, M)); 4322 PetscCall(PetscFree2(rows, cols)); 4323 if (!flg) { 4324 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 4325 PetscFunctionReturn(PETSC_SUCCESS); 4326 } 4327 } 4328 4329 for (PetscInt r = 0; r < nr; r++) { 4330 for (PetscInt c = 0; c < nc; c++) { 4331 Mat sub = mats[r][c]; 4332 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 4333 4334 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 4335 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 4336 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 4337 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 4338 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 4339 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 4340 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 4341 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 4342 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 4343 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4344 if (ftype == MAT_FACTOR_CHOLESKY) { 4345 if (r == c) { 4346 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 4347 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4348 flg = PETSC_FALSE; 4349 } 4350 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4351 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4352 flg = PETSC_FALSE; 4353 } 4354 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4355 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 4356 flg = PETSC_FALSE; 4357 } 4358 } 4359 } 4360 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 4361 4362 /* Create the factorization matrix */ 4363 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4364 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4365 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4366 PetscCall(MatSetUp(B)); 4367 4368 PetscCall(PetscNew(&mumps)); 4369 4370 B->ops->view = MatView_MUMPS; 4371 B->ops->getinfo = MatGetInfo_MUMPS; 4372 4373 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4374 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4375 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4376 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4377 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4378 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4379 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4380 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4381 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4382 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4386 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4387 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4388 4389 if (ftype == MAT_FACTOR_LU) { 4390 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4391 B->factortype = MAT_FACTOR_LU; 4392 mumps->sym = 0; 4393 } else { 4394 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4395 B->factortype = MAT_FACTOR_CHOLESKY; 4396 #if defined(PETSC_USE_COMPLEX) 4397 mumps->sym = 2; 4398 #else 4399 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4400 else mumps->sym = 2; 4401 #endif 4402 } 4403 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 4404 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 4405 4406 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4407 if (size == 1) { 4408 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4409 B->canuseordering = PETSC_TRUE; 4410 } 4411 4412 /* set solvertype */ 4413 PetscCall(PetscFree(B->solvertype)); 4414 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4415 B->ops->destroy = MatDestroy_MUMPS; 4416 B->data = (void *)mumps; 4417 4418 *F = B; 4419 mumps->id.job = JOB_NULL; 4420 mumps->ICNTL_pre = NULL; 4421 mumps->CNTL_pre = NULL; 4422 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4423 PetscFunctionReturn(PETSC_SUCCESS); 4424 } 4425 4426 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 4427 { 4428 PetscFunctionBegin; 4429 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4430 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4431 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4432 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4433 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4434 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4435 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4436 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4437 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4438 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4439 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 4440 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4441 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4442 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4443 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4444 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4445 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4446 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 4447 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 4448 PetscFunctionReturn(PETSC_SUCCESS); 4449 } 4450