1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <petscsf.h> 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 9 #include <../src/mat/impls/sell/mpi/mpisell.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #if defined(PETSC_USE_REAL_SINGLE) 16 #include <cmumps_c.h> 17 #else 18 #include <zmumps_c.h> 19 #endif 20 #else 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <smumps_c.h> 23 #else 24 #include <dmumps_c.h> 25 #endif 26 #endif 27 EXTERN_C_END 28 #define JOB_INIT -1 29 #define JOB_NULL 0 30 #define JOB_FACTSYMBOLIC 1 31 #define JOB_FACTNUMERIC 2 32 #define JOB_SOLVE 3 33 #define JOB_END -2 34 35 /* calls to MUMPS */ 36 #if defined(PETSC_USE_COMPLEX) 37 #if defined(PETSC_USE_REAL_SINGLE) 38 #define MUMPS_c cmumps_c 39 #else 40 #define MUMPS_c zmumps_c 41 #endif 42 #else 43 #if defined(PETSC_USE_REAL_SINGLE) 44 #define MUMPS_c smumps_c 45 #else 46 #define MUMPS_c dmumps_c 47 #endif 48 #endif 49 50 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 51 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 52 naming convention in PetscMPIInt, PetscBLASInt etc. 53 */ 54 typedef MUMPS_INT PetscMUMPSInt; 55 56 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 57 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #else 61 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 62 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 63 #endif 64 #endif 65 66 #define MPIU_MUMPSINT MPI_INT 67 #define PETSC_MUMPS_INT_MAX 2147483647 68 #define PETSC_MUMPS_INT_MIN -2147483648 69 70 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 71 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 72 { 73 PetscFunctionBegin; 74 #if PetscDefined(USE_64BIT_INDICES) 75 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 76 #endif 77 *b = (PetscMUMPSInt)(a); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* Put these utility routines here since they are only used in this file */ 82 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 83 { 84 PetscInt myval; 85 PetscBool myset; 86 PetscFunctionBegin; 87 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 88 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 89 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 90 if (set) *set = myset; 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 94 95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 96 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 97 #define PetscMUMPS_c(mumps) \ 98 do { \ 99 if (mumps->use_petsc_omp_support) { \ 100 if (mumps->is_omp_master) { \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 102 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 103 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 104 PetscCall(PetscFPTrapPop()); \ 105 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 106 } \ 107 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 108 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 109 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 110 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 111 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 112 */ \ 113 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 116 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 117 } else { \ 118 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 119 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 120 PetscCall(PetscFPTrapPop()); \ 121 } \ 122 } while (0) 123 #else 124 #define PetscMUMPS_c(mumps) \ 125 do { \ 126 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 127 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 128 PetscCall(PetscFPTrapPop()); \ 129 } while (0) 130 #endif 131 132 /* declare MumpsScalar */ 133 #if defined(PETSC_USE_COMPLEX) 134 #if defined(PETSC_USE_REAL_SINGLE) 135 #define MumpsScalar mumps_complex 136 #else 137 #define MumpsScalar mumps_double_complex 138 #endif 139 #else 140 #define MumpsScalar PetscScalar 141 #endif 142 143 /* macros s.t. indices match MUMPS documentation */ 144 #define ICNTL(I) icntl[(I)-1] 145 #define CNTL(I) cntl[(I)-1] 146 #define INFOG(I) infog[(I)-1] 147 #define INFO(I) info[(I)-1] 148 #define RINFOG(I) rinfog[(I)-1] 149 #define RINFO(I) rinfo[(I)-1] 150 151 typedef struct Mat_MUMPS Mat_MUMPS; 152 struct Mat_MUMPS { 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_REAL_SINGLE) 155 CMUMPS_STRUC_C id; 156 #else 157 ZMUMPS_STRUC_C id; 158 #endif 159 #else 160 #if defined(PETSC_USE_REAL_SINGLE) 161 SMUMPS_STRUC_C id; 162 #else 163 DMUMPS_STRUC_C id; 164 #endif 165 #endif 166 167 MatStructure matstruc; 168 PetscMPIInt myid, petsc_size; 169 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 170 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 171 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 172 PetscMUMPSInt sym; 173 MPI_Comm mumps_comm; 174 PetscMUMPSInt *ICNTL_pre; 175 PetscReal *CNTL_pre; 176 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 177 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 178 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 179 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 180 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 181 PetscInt *rhs_nrow, max_nrhs; 182 PetscMPIInt *rhs_recvcounts, *rhs_disps; 183 PetscScalar *rhs_loc, *rhs_recvbuf; 184 #endif 185 Vec b_seq, x_seq; 186 PetscInt ninfo, *info; /* which INFO to display */ 187 PetscInt sizeredrhs; 188 PetscScalar *schur_sol; 189 PetscInt schur_sizesol; 190 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 191 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 192 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 193 194 /* Support for MATNEST */ 195 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 196 PetscInt64 *nest_vals_start; 197 PetscScalar *nest_vals; 198 199 /* stuff used by petsc/mumps OpenMP support*/ 200 PetscBool use_petsc_omp_support; 201 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 202 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 203 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 204 PetscMPIInt tag, omp_comm_size; 205 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 206 MPI_Request *reqs; 207 }; 208 209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 210 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 211 */ 212 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 213 { 214 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 215 216 PetscFunctionBegin; 217 #if defined(PETSC_USE_64BIT_INDICES) 218 { 219 PetscInt i; 220 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 221 PetscCall(PetscFree(mumps->ia_alloc)); 222 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 223 mumps->cur_ilen = nrow + 1; 224 } 225 if (nnz > mumps->cur_jlen) { 226 PetscCall(PetscFree(mumps->ja_alloc)); 227 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 228 mumps->cur_jlen = nnz; 229 } 230 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 231 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 232 *ia_mumps = mumps->ia_alloc; 233 *ja_mumps = mumps->ja_alloc; 234 } 235 #else 236 *ia_mumps = ia; 237 *ja_mumps = ja; 238 #endif 239 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 244 { 245 PetscFunctionBegin; 246 PetscCall(PetscFree(mumps->id.listvar_schur)); 247 PetscCall(PetscFree(mumps->id.redrhs)); 248 PetscCall(PetscFree(mumps->schur_sol)); 249 mumps->id.size_schur = 0; 250 mumps->id.schur_lld = 0; 251 mumps->id.ICNTL(19) = 0; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /* solve with rhs in mumps->id.redrhs and return in the same location */ 256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 257 { 258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 259 Mat S, B, X; 260 MatFactorSchurStatus schurstatus; 261 PetscInt sizesol; 262 263 PetscFunctionBegin; 264 PetscCall(MatFactorFactorizeSchurComplement(F)); 265 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 267 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 269 PetscCall(MatBindToCPU(B, S->boundtocpu)); 270 #endif 271 switch (schurstatus) { 272 case MAT_FACTOR_SCHUR_FACTORED: 273 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 274 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 276 PetscCall(MatBindToCPU(X, S->boundtocpu)); 277 #endif 278 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 279 PetscCall(MatMatSolveTranspose(S, B, X)); 280 } else { 281 PetscCall(MatMatSolve(S, B, X)); 282 } 283 break; 284 case MAT_FACTOR_SCHUR_INVERTED: 285 sizesol = mumps->id.nrhs * mumps->id.size_schur; 286 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 287 PetscCall(PetscFree(mumps->schur_sol)); 288 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 289 mumps->schur_sizesol = sizesol; 290 } 291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 292 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 294 PetscCall(MatBindToCPU(X, S->boundtocpu)); 295 #endif 296 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 297 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 298 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 299 } else { 300 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 301 } 302 PetscCall(MatProductSetFromOptions(X)); 303 PetscCall(MatProductSymbolic(X)); 304 PetscCall(MatProductNumeric(X)); 305 306 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 307 break; 308 default: 309 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 310 } 311 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 312 PetscCall(MatDestroy(&B)); 313 PetscCall(MatDestroy(&X)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 318 { 319 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 320 321 PetscFunctionBegin; 322 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 if (!expansion) { /* prepare for the condensation step */ 326 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 327 /* allocate MUMPS internal array to store reduced right-hand sides */ 328 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 329 PetscCall(PetscFree(mumps->id.redrhs)); 330 mumps->id.lredrhs = mumps->id.size_schur; 331 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 332 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 333 } 334 mumps->id.ICNTL(26) = 1; /* condensation phase */ 335 } else { /* prepare for the expansion step */ 336 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 337 PetscCall(MatMumpsSolveSchur_Private(F)); 338 mumps->id.ICNTL(26) = 2; /* expansion phase */ 339 PetscMUMPS_c(mumps); 340 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 341 /* restore defaults */ 342 mumps->id.ICNTL(26) = -1; 343 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 344 if (mumps->id.nrhs > 1) { 345 PetscCall(PetscFree(mumps->id.redrhs)); 346 mumps->id.lredrhs = 0; 347 mumps->sizeredrhs = 0; 348 } 349 } 350 PetscFunctionReturn(PETSC_SUCCESS); 351 } 352 353 /* 354 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 355 356 input: 357 A - matrix in aij,baij or sbaij format 358 shift - 0: C style output triple; 1: Fortran style output triple. 359 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 360 MAT_REUSE_MATRIX: only the values in v array are updated 361 output: 362 nnz - dim of r, c, and v (number of local nonzero entries of A) 363 r, c, v - row and col index, matrix values (matrix triples) 364 365 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 366 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 367 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 368 369 */ 370 371 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 372 { 373 const PetscScalar *av; 374 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 375 PetscInt64 nz, rnz, i, j, k; 376 PetscMUMPSInt *row, *col; 377 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 378 379 PetscFunctionBegin; 380 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 381 mumps->val = (PetscScalar *)av; 382 if (reuse == MAT_INITIAL_MATRIX) { 383 nz = aa->nz; 384 ai = aa->i; 385 aj = aa->j; 386 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 387 for (i = k = 0; i < M; i++) { 388 rnz = ai[i + 1] - ai[i]; 389 ajj = aj + ai[i]; 390 for (j = 0; j < rnz; j++) { 391 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 392 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 393 k++; 394 } 395 } 396 mumps->irn = row; 397 mumps->jcn = col; 398 mumps->nnz = nz; 399 } 400 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 404 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 405 { 406 PetscInt64 nz, i, j, k, r; 407 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 408 PetscMUMPSInt *row, *col; 409 410 PetscFunctionBegin; 411 mumps->val = a->val; 412 if (reuse == MAT_INITIAL_MATRIX) { 413 nz = a->sliidx[a->totalslices]; 414 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 415 for (i = k = 0; i < a->totalslices; i++) { 416 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 417 } 418 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 419 mumps->irn = row; 420 mumps->jcn = col; 421 mumps->nnz = nz; 422 } 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 427 { 428 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 429 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 430 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 431 PetscInt bs; 432 PetscMUMPSInt *row, *col; 433 434 PetscFunctionBegin; 435 PetscCall(MatGetBlockSize(A, &bs)); 436 M = A->rmap->N / bs; 437 mumps->val = aa->a; 438 if (reuse == MAT_INITIAL_MATRIX) { 439 ai = aa->i; 440 aj = aa->j; 441 nz = bs2 * aa->nz; 442 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 443 for (i = 0; i < M; i++) { 444 ajj = aj + ai[i]; 445 rnz = ai[i + 1] - ai[i]; 446 for (k = 0; k < rnz; k++) { 447 for (j = 0; j < bs; j++) { 448 for (m = 0; m < bs; m++) { 449 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 450 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 451 idx++; 452 } 453 } 454 } 455 } 456 mumps->irn = row; 457 mumps->jcn = col; 458 mumps->nnz = nz; 459 } 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 464 { 465 const PetscInt *ai, *aj, *ajj; 466 PetscInt bs; 467 PetscInt64 nz, rnz, i, j, k, m; 468 PetscMUMPSInt *row, *col; 469 PetscScalar *val; 470 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 471 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 472 #if defined(PETSC_USE_COMPLEX) 473 PetscBool isset, hermitian; 474 #endif 475 476 PetscFunctionBegin; 477 #if defined(PETSC_USE_COMPLEX) 478 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 479 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 480 #endif 481 ai = aa->i; 482 aj = aa->j; 483 PetscCall(MatGetBlockSize(A, &bs)); 484 if (reuse == MAT_INITIAL_MATRIX) { 485 const PetscInt64 alloc_size = aa->nz * bs2; 486 487 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 488 if (bs > 1) { 489 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 490 mumps->val = mumps->val_alloc; 491 } else { 492 mumps->val = aa->a; 493 } 494 mumps->irn = row; 495 mumps->jcn = col; 496 } else { 497 if (bs == 1) mumps->val = aa->a; 498 row = mumps->irn; 499 col = mumps->jcn; 500 } 501 val = mumps->val; 502 503 nz = 0; 504 if (bs > 1) { 505 for (i = 0; i < mbs; i++) { 506 rnz = ai[i + 1] - ai[i]; 507 ajj = aj + ai[i]; 508 for (j = 0; j < rnz; j++) { 509 for (k = 0; k < bs; k++) { 510 for (m = 0; m < bs; m++) { 511 if (ajj[j] > i || k >= m) { 512 if (reuse == MAT_INITIAL_MATRIX) { 513 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 514 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 515 } 516 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 517 } 518 } 519 } 520 } 521 } 522 } else if (reuse == MAT_INITIAL_MATRIX) { 523 for (i = 0; i < mbs; i++) { 524 rnz = ai[i + 1] - ai[i]; 525 ajj = aj + ai[i]; 526 for (j = 0; j < rnz; j++) { 527 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 528 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 529 nz++; 530 } 531 } 532 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 533 } 534 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 535 PetscFunctionReturn(PETSC_SUCCESS); 536 } 537 538 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 539 { 540 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 541 PetscInt64 nz, rnz, i, j; 542 const PetscScalar *av, *v1; 543 PetscScalar *val; 544 PetscMUMPSInt *row, *col; 545 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 546 PetscBool missing; 547 #if defined(PETSC_USE_COMPLEX) 548 PetscBool hermitian, isset; 549 #endif 550 551 PetscFunctionBegin; 552 #if defined(PETSC_USE_COMPLEX) 553 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 554 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 555 #endif 556 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 557 ai = aa->i; 558 aj = aa->j; 559 adiag = aa->diag; 560 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 561 if (reuse == MAT_INITIAL_MATRIX) { 562 /* count nz in the upper triangular part of A */ 563 nz = 0; 564 if (missing) { 565 for (i = 0; i < M; i++) { 566 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 567 for (j = ai[i]; j < ai[i + 1]; j++) { 568 if (aj[j] < i) continue; 569 nz++; 570 } 571 } else { 572 nz += ai[i + 1] - adiag[i]; 573 } 574 } 575 } else { 576 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 577 } 578 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 579 PetscCall(PetscMalloc1(nz, &val)); 580 mumps->nnz = nz; 581 mumps->irn = row; 582 mumps->jcn = col; 583 mumps->val = mumps->val_alloc = val; 584 585 nz = 0; 586 if (missing) { 587 for (i = 0; i < M; i++) { 588 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 589 for (j = ai[i]; j < ai[i + 1]; j++) { 590 if (aj[j] < i) continue; 591 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 592 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 593 val[nz] = av[j]; 594 nz++; 595 } 596 } else { 597 rnz = ai[i + 1] - adiag[i]; 598 ajj = aj + adiag[i]; 599 v1 = av + adiag[i]; 600 for (j = 0; j < rnz; j++) { 601 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 602 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 603 val[nz++] = v1[j]; 604 } 605 } 606 } 607 } else { 608 for (i = 0; i < M; i++) { 609 rnz = ai[i + 1] - adiag[i]; 610 ajj = aj + adiag[i]; 611 v1 = av + adiag[i]; 612 for (j = 0; j < rnz; j++) { 613 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 614 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 615 val[nz++] = v1[j]; 616 } 617 } 618 } 619 } else { 620 nz = 0; 621 val = mumps->val; 622 if (missing) { 623 for (i = 0; i < M; i++) { 624 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 625 for (j = ai[i]; j < ai[i + 1]; j++) { 626 if (aj[j] < i) continue; 627 val[nz++] = av[j]; 628 } 629 } else { 630 rnz = ai[i + 1] - adiag[i]; 631 v1 = av + adiag[i]; 632 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 633 } 634 } 635 } else { 636 for (i = 0; i < M; i++) { 637 rnz = ai[i + 1] - adiag[i]; 638 v1 = av + adiag[i]; 639 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 640 } 641 } 642 } 643 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 644 PetscFunctionReturn(PETSC_SUCCESS); 645 } 646 647 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 648 { 649 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 650 PetscInt bs; 651 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 652 PetscMUMPSInt *row, *col; 653 const PetscScalar *av, *bv, *v1, *v2; 654 PetscScalar *val; 655 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 656 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 657 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 658 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 659 #if defined(PETSC_USE_COMPLEX) 660 PetscBool hermitian, isset; 661 #endif 662 663 PetscFunctionBegin; 664 #if defined(PETSC_USE_COMPLEX) 665 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 666 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 667 #endif 668 PetscCall(MatGetBlockSize(A, &bs)); 669 rstart = A->rmap->rstart; 670 ai = aa->i; 671 aj = aa->j; 672 bi = bb->i; 673 bj = bb->j; 674 av = aa->a; 675 bv = bb->a; 676 677 garray = mat->garray; 678 679 if (reuse == MAT_INITIAL_MATRIX) { 680 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 681 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 682 PetscCall(PetscMalloc1(nz, &val)); 683 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 684 mumps->irn = row; 685 mumps->jcn = col; 686 mumps->val = mumps->val_alloc = val; 687 } else { 688 val = mumps->val; 689 } 690 691 jj = 0; 692 irow = rstart; 693 for (i = 0; i < mbs; i++) { 694 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 695 countA = ai[i + 1] - ai[i]; 696 countB = bi[i + 1] - bi[i]; 697 bjj = bj + bi[i]; 698 v1 = av + ai[i] * bs2; 699 v2 = bv + bi[i] * bs2; 700 701 if (bs > 1) { 702 /* A-part */ 703 for (j = 0; j < countA; j++) { 704 for (k = 0; k < bs; k++) { 705 for (m = 0; m < bs; m++) { 706 if (rstart + ajj[j] * bs > irow || k >= m) { 707 if (reuse == MAT_INITIAL_MATRIX) { 708 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 709 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 710 } 711 val[jj++] = v1[j * bs2 + m + k * bs]; 712 } 713 } 714 } 715 } 716 717 /* B-part */ 718 for (j = 0; j < countB; j++) { 719 for (k = 0; k < bs; k++) { 720 for (m = 0; m < bs; m++) { 721 if (reuse == MAT_INITIAL_MATRIX) { 722 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 723 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 724 } 725 val[jj++] = v2[j * bs2 + m + k * bs]; 726 } 727 } 728 } 729 } else { 730 /* A-part */ 731 for (j = 0; j < countA; j++) { 732 if (reuse == MAT_INITIAL_MATRIX) { 733 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 734 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 735 } 736 val[jj++] = v1[j]; 737 } 738 739 /* B-part */ 740 for (j = 0; j < countB; j++) { 741 if (reuse == MAT_INITIAL_MATRIX) { 742 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 743 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 744 } 745 val[jj++] = v2[j]; 746 } 747 } 748 irow += bs; 749 } 750 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 755 { 756 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 757 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 758 PetscMUMPSInt *row, *col; 759 const PetscScalar *av, *bv, *v1, *v2; 760 PetscScalar *val; 761 Mat Ad, Ao; 762 Mat_SeqAIJ *aa; 763 Mat_SeqAIJ *bb; 764 765 PetscFunctionBegin; 766 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 767 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 768 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 769 770 aa = (Mat_SeqAIJ *)(Ad)->data; 771 bb = (Mat_SeqAIJ *)(Ao)->data; 772 ai = aa->i; 773 aj = aa->j; 774 bi = bb->i; 775 bj = bb->j; 776 777 rstart = A->rmap->rstart; 778 cstart = A->cmap->rstart; 779 780 if (reuse == MAT_INITIAL_MATRIX) { 781 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 782 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 783 PetscCall(PetscMalloc1(nz, &val)); 784 mumps->nnz = nz; 785 mumps->irn = row; 786 mumps->jcn = col; 787 mumps->val = mumps->val_alloc = val; 788 } else { 789 val = mumps->val; 790 } 791 792 jj = 0; 793 irow = rstart; 794 for (i = 0; i < m; i++) { 795 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 796 countA = ai[i + 1] - ai[i]; 797 countB = bi[i + 1] - bi[i]; 798 bjj = bj + bi[i]; 799 v1 = av + ai[i]; 800 v2 = bv + bi[i]; 801 802 /* A-part */ 803 for (j = 0; j < countA; j++) { 804 if (reuse == MAT_INITIAL_MATRIX) { 805 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 806 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 807 } 808 val[jj++] = v1[j]; 809 } 810 811 /* B-part */ 812 for (j = 0; j < countB; j++) { 813 if (reuse == MAT_INITIAL_MATRIX) { 814 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 815 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 816 } 817 val[jj++] = v2[j]; 818 } 819 irow++; 820 } 821 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 822 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 823 PetscFunctionReturn(PETSC_SUCCESS); 824 } 825 826 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 827 { 828 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 829 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 830 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 831 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 832 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 833 const PetscInt bs2 = mat->bs2; 834 PetscInt bs; 835 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 836 PetscMUMPSInt *row, *col; 837 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 838 PetscScalar *val; 839 840 PetscFunctionBegin; 841 PetscCall(MatGetBlockSize(A, &bs)); 842 if (reuse == MAT_INITIAL_MATRIX) { 843 nz = bs2 * (aa->nz + bb->nz); 844 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 845 PetscCall(PetscMalloc1(nz, &val)); 846 mumps->nnz = nz; 847 mumps->irn = row; 848 mumps->jcn = col; 849 mumps->val = mumps->val_alloc = val; 850 } else { 851 val = mumps->val; 852 } 853 854 jj = 0; 855 irow = rstart; 856 for (i = 0; i < mbs; i++) { 857 countA = ai[i + 1] - ai[i]; 858 countB = bi[i + 1] - bi[i]; 859 ajj = aj + ai[i]; 860 bjj = bj + bi[i]; 861 v1 = av + bs2 * ai[i]; 862 v2 = bv + bs2 * bi[i]; 863 864 idx = 0; 865 /* A-part */ 866 for (k = 0; k < countA; k++) { 867 for (j = 0; j < bs; j++) { 868 for (n = 0; n < bs; n++) { 869 if (reuse == MAT_INITIAL_MATRIX) { 870 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 871 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 872 } 873 val[jj++] = v1[idx++]; 874 } 875 } 876 } 877 878 idx = 0; 879 /* B-part */ 880 for (k = 0; k < countB; k++) { 881 for (j = 0; j < bs; j++) { 882 for (n = 0; n < bs; n++) { 883 if (reuse == MAT_INITIAL_MATRIX) { 884 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 885 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 886 } 887 val[jj++] = v2[idx++]; 888 } 889 } 890 } 891 irow += bs; 892 } 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 897 { 898 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 899 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 900 PetscMUMPSInt *row, *col; 901 const PetscScalar *av, *bv, *v1, *v2; 902 PetscScalar *val; 903 Mat Ad, Ao; 904 Mat_SeqAIJ *aa; 905 Mat_SeqAIJ *bb; 906 #if defined(PETSC_USE_COMPLEX) 907 PetscBool hermitian, isset; 908 #endif 909 910 PetscFunctionBegin; 911 #if defined(PETSC_USE_COMPLEX) 912 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 913 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 914 #endif 915 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 916 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 917 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 918 919 aa = (Mat_SeqAIJ *)(Ad)->data; 920 bb = (Mat_SeqAIJ *)(Ao)->data; 921 ai = aa->i; 922 aj = aa->j; 923 adiag = aa->diag; 924 bi = bb->i; 925 bj = bb->j; 926 927 rstart = A->rmap->rstart; 928 929 if (reuse == MAT_INITIAL_MATRIX) { 930 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 931 nzb = 0; /* num of upper triangular entries in mat->B */ 932 for (i = 0; i < m; i++) { 933 nza += (ai[i + 1] - adiag[i]); 934 countB = bi[i + 1] - bi[i]; 935 bjj = bj + bi[i]; 936 for (j = 0; j < countB; j++) { 937 if (garray[bjj[j]] > rstart) nzb++; 938 } 939 } 940 941 nz = nza + nzb; /* total nz of upper triangular part of mat */ 942 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 943 PetscCall(PetscMalloc1(nz, &val)); 944 mumps->nnz = nz; 945 mumps->irn = row; 946 mumps->jcn = col; 947 mumps->val = mumps->val_alloc = val; 948 } else { 949 val = mumps->val; 950 } 951 952 jj = 0; 953 irow = rstart; 954 for (i = 0; i < m; i++) { 955 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 956 v1 = av + adiag[i]; 957 countA = ai[i + 1] - adiag[i]; 958 countB = bi[i + 1] - bi[i]; 959 bjj = bj + bi[i]; 960 v2 = bv + bi[i]; 961 962 /* A-part */ 963 for (j = 0; j < countA; j++) { 964 if (reuse == MAT_INITIAL_MATRIX) { 965 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 966 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 967 } 968 val[jj++] = v1[j]; 969 } 970 971 /* B-part */ 972 for (j = 0; j < countB; j++) { 973 if (garray[bjj[j]] > rstart) { 974 if (reuse == MAT_INITIAL_MATRIX) { 975 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 976 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 977 } 978 val[jj++] = v2[j]; 979 } 980 } 981 irow++; 982 } 983 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 984 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 985 PetscFunctionReturn(PETSC_SUCCESS); 986 } 987 988 PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 989 { 990 Mat **mats; 991 PetscInt nr, nc; 992 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 993 994 PetscFunctionBegin; 995 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 996 if (reuse == MAT_INITIAL_MATRIX) { 997 PetscMUMPSInt *irns, *jcns; 998 PetscScalar *vals; 999 PetscInt64 totnnz, cumnnz, maxnnz; 1000 PetscInt *pjcns_w; 1001 IS *rows, *cols; 1002 PetscInt **rows_idx, **cols_idx; 1003 1004 cumnnz = 0; 1005 maxnnz = 0; 1006 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1007 for (PetscInt r = 0; r < nr; r++) { 1008 for (PetscInt c = 0; c < nc; c++) { 1009 Mat sub = mats[r][c]; 1010 1011 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1012 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1013 if (sub) { 1014 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1015 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE; 1016 MatInfo info; 1017 1018 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1019 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1020 else { 1021 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1022 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1023 } 1024 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1025 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1026 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1027 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1028 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1029 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1030 1031 if (chol) { 1032 if (r == c) { 1033 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1034 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1035 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1036 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1037 } else { 1038 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1039 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1040 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1041 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1042 } 1043 } else { 1044 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1045 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1046 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1047 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1048 } 1049 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1050 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1051 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1052 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1053 maxnnz = PetscMax(maxnnz, info.nz_used); 1054 } 1055 } 1056 } 1057 1058 /* Allocate total COO */ 1059 totnnz = cumnnz; 1060 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1061 PetscCall(PetscMalloc1(totnnz, &vals)); 1062 1063 /* Handle rows and column maps 1064 We directly map rows and use an SF for the columns */ 1065 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1066 PetscCall(MatNestGetISs(A, rows, cols)); 1067 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1068 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1069 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1070 else (void)maxnnz; 1071 1072 cumnnz = 0; 1073 for (PetscInt r = 0; r < nr; r++) { 1074 for (PetscInt c = 0; c < nc; c++) { 1075 Mat sub = mats[r][c]; 1076 const PetscInt *ridx = rows_idx[r]; 1077 const PetscInt *cidx = cols_idx[c]; 1078 PetscInt rst; 1079 PetscSF csf; 1080 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1081 PetscLayout cmap; 1082 1083 mumps->nest_vals_start[r * nc + c] = cumnnz; 1084 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1085 1086 /* Extract inner blocks if needed */ 1087 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1088 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1089 else { 1090 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1091 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1092 } 1093 swap = (PetscBool)(isTrans || isHTrans); 1094 1095 /* Get column layout to map off-process columns */ 1096 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1097 1098 /* Get row start to map on-process rows */ 1099 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1100 1101 /* Directly use the mumps datastructure and use C ordering for now */ 1102 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1103 1104 /* Swap the role of rows and columns indices for transposed blocks 1105 since we need values with global final ordering */ 1106 if (swap) { 1107 cidx = rows_idx[r]; 1108 ridx = cols_idx[c]; 1109 } 1110 1111 /* Communicate column indices 1112 This could have been done with a single SF but it would have complicated the code a lot. 1113 But since we do it only once, we pay the price of setting up an SF for each block */ 1114 if (PetscDefined(USE_64BIT_INDICES)) { 1115 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1116 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1117 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1118 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1119 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1120 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1121 PetscCall(PetscSFDestroy(&csf)); 1122 1123 /* Import indices: use direct map for rows and mapped indices for columns */ 1124 if (swap) { 1125 for (PetscInt k = 0; k < mumps->nnz; k++) { 1126 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1127 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1128 } 1129 } else { 1130 for (PetscInt k = 0; k < mumps->nnz; k++) { 1131 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1132 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1133 } 1134 } 1135 1136 /* Import values to full COO */ 1137 if (isHTrans) { /* conjugate the entries */ 1138 for (PetscInt k = 0; k < mumps->nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1139 } 1140 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1141 1142 /* Shift new starting point and sanity check */ 1143 cumnnz += mumps->nnz; 1144 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1145 1146 /* Free scratch memory */ 1147 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1148 PetscCall(PetscFree(mumps->val_alloc)); 1149 mumps->val = NULL; 1150 mumps->nnz = 0; 1151 } 1152 } 1153 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1154 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1155 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1156 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1157 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1158 mumps->nest_vals_start[nr * nc] = cumnnz; 1159 1160 /* Set pointers for final MUMPS data structure */ 1161 mumps->nest_vals = vals; 1162 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1163 mumps->val = vals; 1164 mumps->irn = irns; 1165 mumps->jcn = jcns; 1166 mumps->nnz = cumnnz; 1167 } else { 1168 PetscScalar *oval = mumps->nest_vals; 1169 for (PetscInt r = 0; r < nr; r++) { 1170 for (PetscInt c = 0; c < nc; c++) { 1171 PetscBool isTrans, isHTrans = PETSC_FALSE; 1172 Mat sub = mats[r][c]; 1173 PetscInt midx = r * nc + c; 1174 1175 if (!mumps->nest_convert_to_triples[midx]) continue; 1176 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1177 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1178 else { 1179 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1180 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1181 } 1182 mumps->val = oval + mumps->nest_vals_start[midx]; 1183 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1184 if (isHTrans) { 1185 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1186 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1187 } 1188 } 1189 } 1190 mumps->val = oval; 1191 } 1192 PetscFunctionReturn(PETSC_SUCCESS); 1193 } 1194 1195 PetscErrorCode MatDestroy_MUMPS(Mat A) 1196 { 1197 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1201 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1202 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1203 PetscCall(VecDestroy(&mumps->b_seq)); 1204 PetscCall(VecDestroy(&mumps->x_seq)); 1205 PetscCall(PetscFree(mumps->id.perm_in)); 1206 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1207 PetscCall(PetscFree(mumps->val_alloc)); 1208 PetscCall(PetscFree(mumps->info)); 1209 PetscCall(PetscFree(mumps->ICNTL_pre)); 1210 PetscCall(PetscFree(mumps->CNTL_pre)); 1211 PetscCall(MatMumpsResetSchur_Private(mumps)); 1212 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1213 mumps->id.job = JOB_END; 1214 PetscMUMPS_c(mumps); 1215 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1216 if (mumps->mumps_comm != MPI_COMM_NULL) { 1217 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1218 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1219 } 1220 } 1221 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1222 if (mumps->use_petsc_omp_support) { 1223 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1224 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1225 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1226 } 1227 #endif 1228 PetscCall(PetscFree(mumps->ia_alloc)); 1229 PetscCall(PetscFree(mumps->ja_alloc)); 1230 PetscCall(PetscFree(mumps->recvcount)); 1231 PetscCall(PetscFree(mumps->reqs)); 1232 PetscCall(PetscFree(mumps->irhs_loc)); 1233 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1234 PetscCall(PetscFree(mumps->nest_vals)); 1235 PetscCall(PetscFree(A->data)); 1236 1237 /* clear composed functions */ 1238 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1239 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1240 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1241 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1242 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1243 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1244 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1245 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1246 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1247 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1248 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1249 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1250 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1251 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1256 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1257 { 1258 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1259 const PetscMPIInt ompsize = mumps->omp_comm_size; 1260 PetscInt i, m, M, rstart; 1261 1262 PetscFunctionBegin; 1263 PetscCall(MatGetSize(A, &M, NULL)); 1264 PetscCall(MatGetLocalSize(A, &m, NULL)); 1265 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1266 if (ompsize == 1) { 1267 if (!mumps->irhs_loc) { 1268 mumps->nloc_rhs = m; 1269 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1270 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1271 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1272 } 1273 mumps->id.rhs_loc = (MumpsScalar *)array; 1274 } else { 1275 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1276 const PetscInt *ranges; 1277 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1278 MPI_Group petsc_group, omp_group; 1279 PetscScalar *recvbuf = NULL; 1280 1281 if (mumps->is_omp_master) { 1282 /* Lazily initialize the omp stuff for distributed rhs */ 1283 if (!mumps->irhs_loc) { 1284 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1285 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1286 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1287 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1288 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1289 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1290 1291 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1292 mumps->nloc_rhs = 0; 1293 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1294 for (j = 0; j < ompsize; j++) { 1295 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1296 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1297 } 1298 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1299 for (j = k = 0; j < ompsize; j++) { 1300 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1301 } 1302 1303 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1304 PetscCallMPI(MPI_Group_free(&petsc_group)); 1305 PetscCallMPI(MPI_Group_free(&omp_group)); 1306 } 1307 1308 /* Realloc buffers when current nrhs is bigger than what we have met */ 1309 if (nrhs > mumps->max_nrhs) { 1310 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1311 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1312 mumps->max_nrhs = nrhs; 1313 } 1314 1315 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1316 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1317 mumps->rhs_disps[0] = 0; 1318 for (j = 1; j < ompsize; j++) { 1319 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1320 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1321 } 1322 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1323 } 1324 1325 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1326 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1327 1328 if (mumps->is_omp_master) { 1329 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1330 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1331 for (j = 0; j < ompsize; j++) { 1332 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1333 dst = dstbase; 1334 for (i = 0; i < nrhs; i++) { 1335 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1336 src += mumps->rhs_nrow[j]; 1337 dst += mumps->nloc_rhs; 1338 } 1339 dstbase += mumps->rhs_nrow[j]; 1340 } 1341 } 1342 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1343 } 1344 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1345 } 1346 mumps->id.nrhs = nrhs; 1347 mumps->id.nloc_rhs = mumps->nloc_rhs; 1348 mumps->id.lrhs_loc = mumps->nloc_rhs; 1349 mumps->id.irhs_loc = mumps->irhs_loc; 1350 PetscFunctionReturn(PETSC_SUCCESS); 1351 } 1352 1353 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1354 { 1355 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1356 const PetscScalar *rarray = NULL; 1357 PetscScalar *array; 1358 IS is_iden, is_petsc; 1359 PetscInt i; 1360 PetscBool second_solve = PETSC_FALSE; 1361 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1362 1363 PetscFunctionBegin; 1364 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1365 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1366 &cite1)); 1367 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1368 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1369 &cite2)); 1370 1371 if (A->factorerrortype) { 1372 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1373 PetscCall(VecSetInf(x)); 1374 PetscFunctionReturn(PETSC_SUCCESS); 1375 } 1376 1377 mumps->id.nrhs = 1; 1378 if (mumps->petsc_size > 1) { 1379 if (mumps->ICNTL20 == 10) { 1380 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1381 PetscCall(VecGetArrayRead(b, &rarray)); 1382 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1383 } else { 1384 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1385 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1386 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1387 if (!mumps->myid) { 1388 PetscCall(VecGetArray(mumps->b_seq, &array)); 1389 mumps->id.rhs = (MumpsScalar *)array; 1390 } 1391 } 1392 } else { /* petsc_size == 1 */ 1393 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1394 PetscCall(VecCopy(b, x)); 1395 PetscCall(VecGetArray(x, &array)); 1396 mumps->id.rhs = (MumpsScalar *)array; 1397 } 1398 1399 /* 1400 handle condensation step of Schur complement (if any) 1401 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1402 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1403 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1404 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1405 */ 1406 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1407 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1408 second_solve = PETSC_TRUE; 1409 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1410 } 1411 /* solve phase */ 1412 mumps->id.job = JOB_SOLVE; 1413 PetscMUMPS_c(mumps); 1414 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1415 1416 /* handle expansion step of Schur complement (if any) */ 1417 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1418 1419 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1420 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1421 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1422 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1423 } 1424 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1425 PetscInt *isol2_loc = NULL; 1426 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1427 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1428 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1429 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1430 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1431 PetscCall(ISDestroy(&is_iden)); 1432 PetscCall(ISDestroy(&is_petsc)); 1433 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1434 } 1435 1436 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1438 } 1439 1440 if (mumps->petsc_size > 1) { 1441 if (mumps->ICNTL20 == 10) { 1442 PetscCall(VecRestoreArrayRead(b, &rarray)); 1443 } else if (!mumps->myid) { 1444 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1445 } 1446 } else PetscCall(VecRestoreArray(x, &array)); 1447 1448 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1449 PetscFunctionReturn(PETSC_SUCCESS); 1450 } 1451 1452 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1453 { 1454 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1455 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1456 1457 PetscFunctionBegin; 1458 mumps->id.ICNTL(9) = 0; 1459 PetscCall(MatSolve_MUMPS(A, b, x)); 1460 mumps->id.ICNTL(9) = value; 1461 PetscFunctionReturn(PETSC_SUCCESS); 1462 } 1463 1464 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1465 { 1466 Mat Bt = NULL; 1467 PetscBool denseX, denseB, flg, flgT; 1468 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1469 PetscInt i, nrhs, M; 1470 PetscScalar *array; 1471 const PetscScalar *rbray; 1472 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1473 PetscMUMPSInt *isol_loc, *isol_loc_save; 1474 PetscScalar *bray, *sol_loc, *sol_loc_save; 1475 IS is_to, is_from; 1476 PetscInt k, proc, j, m, myrstart; 1477 const PetscInt *rstart; 1478 Vec v_mpi, msol_loc; 1479 VecScatter scat_sol; 1480 Vec b_seq; 1481 VecScatter scat_rhs; 1482 PetscScalar *aa; 1483 PetscInt spnr, *ia, *ja; 1484 Mat_MPIAIJ *b = NULL; 1485 1486 PetscFunctionBegin; 1487 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1488 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1489 1490 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1491 if (denseB) { 1492 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1493 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1494 } else { /* sparse B */ 1495 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1496 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1497 if (flgT) { /* input B is transpose of actual RHS matrix, 1498 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1499 PetscCall(MatTransposeGetMat(B, &Bt)); 1500 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1501 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1502 } 1503 1504 PetscCall(MatGetSize(B, &M, &nrhs)); 1505 mumps->id.nrhs = nrhs; 1506 mumps->id.lrhs = M; 1507 mumps->id.rhs = NULL; 1508 1509 if (mumps->petsc_size == 1) { 1510 PetscScalar *aa; 1511 PetscInt spnr, *ia, *ja; 1512 PetscBool second_solve = PETSC_FALSE; 1513 1514 PetscCall(MatDenseGetArray(X, &array)); 1515 mumps->id.rhs = (MumpsScalar *)array; 1516 1517 if (denseB) { 1518 /* copy B to X */ 1519 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1520 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1521 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1522 } else { /* sparse B */ 1523 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1524 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1525 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1526 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1527 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1528 } 1529 /* handle condensation step of Schur complement (if any) */ 1530 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1531 second_solve = PETSC_TRUE; 1532 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1533 } 1534 /* solve phase */ 1535 mumps->id.job = JOB_SOLVE; 1536 PetscMUMPS_c(mumps); 1537 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1538 1539 /* handle expansion step of Schur complement (if any) */ 1540 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1541 if (!denseB) { /* sparse B */ 1542 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1543 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1544 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1545 } 1546 PetscCall(MatDenseRestoreArray(X, &array)); 1547 PetscFunctionReturn(PETSC_SUCCESS); 1548 } 1549 1550 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1551 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1552 1553 /* create msol_loc to hold mumps local solution */ 1554 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1555 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1556 1557 lsol_loc = mumps->id.lsol_loc; 1558 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1559 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1560 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1561 mumps->id.isol_loc = isol_loc; 1562 1563 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1564 1565 if (denseB) { 1566 if (mumps->ICNTL20 == 10) { 1567 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1568 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1569 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1570 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1571 PetscCall(MatGetLocalSize(B, &m, NULL)); 1572 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1573 } else { 1574 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1575 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1576 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1577 0, re-arrange B into desired order, which is a local operation. 1578 */ 1579 1580 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1581 /* wrap dense rhs matrix B into a vector v_mpi */ 1582 PetscCall(MatGetLocalSize(B, &m, NULL)); 1583 PetscCall(MatDenseGetArray(B, &bray)); 1584 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1585 PetscCall(MatDenseRestoreArray(B, &bray)); 1586 1587 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1588 if (!mumps->myid) { 1589 PetscInt *idx; 1590 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1591 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1592 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1593 k = 0; 1594 for (proc = 0; proc < mumps->petsc_size; proc++) { 1595 for (j = 0; j < nrhs; j++) { 1596 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1597 } 1598 } 1599 1600 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1601 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1602 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1603 } else { 1604 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1605 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1606 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1607 } 1608 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1609 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1610 PetscCall(ISDestroy(&is_to)); 1611 PetscCall(ISDestroy(&is_from)); 1612 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1613 1614 if (!mumps->myid) { /* define rhs on the host */ 1615 PetscCall(VecGetArray(b_seq, &bray)); 1616 mumps->id.rhs = (MumpsScalar *)bray; 1617 PetscCall(VecRestoreArray(b_seq, &bray)); 1618 } 1619 } 1620 } else { /* sparse B */ 1621 b = (Mat_MPIAIJ *)Bt->data; 1622 1623 /* wrap dense X into a vector v_mpi */ 1624 PetscCall(MatGetLocalSize(X, &m, NULL)); 1625 PetscCall(MatDenseGetArray(X, &bray)); 1626 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1627 PetscCall(MatDenseRestoreArray(X, &bray)); 1628 1629 if (!mumps->myid) { 1630 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1631 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1632 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1633 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1634 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1635 } else { 1636 mumps->id.irhs_ptr = NULL; 1637 mumps->id.irhs_sparse = NULL; 1638 mumps->id.nz_rhs = 0; 1639 mumps->id.rhs_sparse = NULL; 1640 } 1641 } 1642 1643 /* solve phase */ 1644 mumps->id.job = JOB_SOLVE; 1645 PetscMUMPS_c(mumps); 1646 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1647 1648 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1649 PetscCall(MatDenseGetArray(X, &array)); 1650 PetscCall(VecPlaceArray(v_mpi, array)); 1651 1652 /* create scatter scat_sol */ 1653 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1654 /* iidx: index for scatter mumps solution to petsc X */ 1655 1656 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1657 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1658 for (i = 0; i < lsol_loc; i++) { 1659 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1660 1661 for (proc = 0; proc < mumps->petsc_size; proc++) { 1662 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1663 myrstart = rstart[proc]; 1664 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1665 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1666 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1667 break; 1668 } 1669 } 1670 1671 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1672 } 1673 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1674 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1675 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1676 PetscCall(ISDestroy(&is_from)); 1677 PetscCall(ISDestroy(&is_to)); 1678 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1679 PetscCall(MatDenseRestoreArray(X, &array)); 1680 1681 /* free spaces */ 1682 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1683 mumps->id.isol_loc = isol_loc_save; 1684 1685 PetscCall(PetscFree2(sol_loc, isol_loc)); 1686 PetscCall(PetscFree(idxx)); 1687 PetscCall(VecDestroy(&msol_loc)); 1688 PetscCall(VecDestroy(&v_mpi)); 1689 if (!denseB) { 1690 if (!mumps->myid) { 1691 b = (Mat_MPIAIJ *)Bt->data; 1692 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1693 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1694 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1695 } 1696 } else { 1697 if (mumps->ICNTL20 == 0) { 1698 PetscCall(VecDestroy(&b_seq)); 1699 PetscCall(VecScatterDestroy(&scat_rhs)); 1700 } 1701 } 1702 PetscCall(VecScatterDestroy(&scat_sol)); 1703 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1704 PetscFunctionReturn(PETSC_SUCCESS); 1705 } 1706 1707 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1708 { 1709 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1710 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1711 1712 PetscFunctionBegin; 1713 mumps->id.ICNTL(9) = 0; 1714 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1715 mumps->id.ICNTL(9) = value; 1716 PetscFunctionReturn(PETSC_SUCCESS); 1717 } 1718 1719 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1720 { 1721 PetscBool flg; 1722 Mat B; 1723 1724 PetscFunctionBegin; 1725 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1726 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1727 1728 /* Create B=Bt^T that uses Bt's data structure */ 1729 PetscCall(MatCreateTranspose(Bt, &B)); 1730 1731 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1732 PetscCall(MatDestroy(&B)); 1733 PetscFunctionReturn(PETSC_SUCCESS); 1734 } 1735 1736 #if !defined(PETSC_USE_COMPLEX) 1737 /* 1738 input: 1739 F: numeric factor 1740 output: 1741 nneg: total number of negative pivots 1742 nzero: total number of zero pivots 1743 npos: (global dimension of F) - nneg - nzero 1744 */ 1745 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1746 { 1747 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1748 PetscMPIInt size; 1749 1750 PetscFunctionBegin; 1751 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1752 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1753 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1754 1755 if (nneg) *nneg = mumps->id.INFOG(12); 1756 if (nzero || npos) { 1757 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1758 if (nzero) *nzero = mumps->id.INFOG(28); 1759 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1760 } 1761 PetscFunctionReturn(PETSC_SUCCESS); 1762 } 1763 #endif 1764 1765 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1766 { 1767 PetscInt i, nreqs; 1768 PetscMUMPSInt *irn, *jcn; 1769 PetscMPIInt count; 1770 PetscInt64 totnnz, remain; 1771 const PetscInt osize = mumps->omp_comm_size; 1772 PetscScalar *val; 1773 1774 PetscFunctionBegin; 1775 if (osize > 1) { 1776 if (reuse == MAT_INITIAL_MATRIX) { 1777 /* master first gathers counts of nonzeros to receive */ 1778 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1779 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1780 1781 /* Then each computes number of send/recvs */ 1782 if (mumps->is_omp_master) { 1783 /* Start from 1 since self communication is not done in MPI */ 1784 nreqs = 0; 1785 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1786 } else { 1787 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1788 } 1789 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1790 1791 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1792 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1793 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1794 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1795 */ 1796 nreqs = 0; /* counter for actual send/recvs */ 1797 if (mumps->is_omp_master) { 1798 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1799 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1800 PetscCall(PetscMalloc1(totnnz, &val)); 1801 1802 /* Self communication */ 1803 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1804 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1805 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1806 1807 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1808 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1809 PetscCall(PetscFree(mumps->val_alloc)); 1810 mumps->nnz = totnnz; 1811 mumps->irn = irn; 1812 mumps->jcn = jcn; 1813 mumps->val = mumps->val_alloc = val; 1814 1815 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1816 jcn += mumps->recvcount[0]; 1817 val += mumps->recvcount[0]; 1818 1819 /* Remote communication */ 1820 for (i = 1; i < osize; i++) { 1821 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1822 remain = mumps->recvcount[i] - count; 1823 while (count > 0) { 1824 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1825 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1826 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1827 irn += count; 1828 jcn += count; 1829 val += count; 1830 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1831 remain -= count; 1832 } 1833 } 1834 } else { 1835 irn = mumps->irn; 1836 jcn = mumps->jcn; 1837 val = mumps->val; 1838 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1839 remain = mumps->nnz - count; 1840 while (count > 0) { 1841 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1842 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1843 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1844 irn += count; 1845 jcn += count; 1846 val += count; 1847 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1848 remain -= count; 1849 } 1850 } 1851 } else { 1852 nreqs = 0; 1853 if (mumps->is_omp_master) { 1854 val = mumps->val + mumps->recvcount[0]; 1855 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1856 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1857 remain = mumps->recvcount[i] - count; 1858 while (count > 0) { 1859 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1860 val += count; 1861 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1862 remain -= count; 1863 } 1864 } 1865 } else { 1866 val = mumps->val; 1867 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1868 remain = mumps->nnz - count; 1869 while (count > 0) { 1870 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1871 val += count; 1872 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1873 remain -= count; 1874 } 1875 } 1876 } 1877 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1878 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1879 } 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1884 { 1885 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1886 PetscBool isMPIAIJ; 1887 1888 PetscFunctionBegin; 1889 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1890 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1891 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1892 PetscFunctionReturn(PETSC_SUCCESS); 1893 } 1894 1895 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1896 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1897 1898 /* numerical factorization phase */ 1899 mumps->id.job = JOB_FACTNUMERIC; 1900 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1901 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1902 } else { 1903 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1904 } 1905 PetscMUMPS_c(mumps); 1906 if (mumps->id.INFOG(1) < 0) { 1907 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1908 if (mumps->id.INFOG(1) == -10) { 1909 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1910 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1911 } else if (mumps->id.INFOG(1) == -13) { 1912 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1913 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1914 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1915 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1916 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1917 } else { 1918 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1919 F->factorerrortype = MAT_FACTOR_OTHER; 1920 } 1921 } 1922 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 1923 1924 F->assembled = PETSC_TRUE; 1925 1926 if (F->schur) { /* reset Schur status to unfactored */ 1927 #if defined(PETSC_HAVE_CUDA) 1928 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1929 #endif 1930 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1931 mumps->id.ICNTL(19) = 2; 1932 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1933 } 1934 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1935 } 1936 1937 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1938 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1939 1940 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1941 if (mumps->petsc_size > 1) { 1942 PetscInt lsol_loc; 1943 PetscScalar *sol_loc; 1944 1945 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1946 1947 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1948 if (mumps->x_seq) { 1949 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1950 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1951 PetscCall(VecDestroy(&mumps->x_seq)); 1952 } 1953 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1954 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1955 mumps->id.lsol_loc = lsol_loc; 1956 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1957 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1958 } 1959 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1960 PetscFunctionReturn(PETSC_SUCCESS); 1961 } 1962 1963 /* Sets MUMPS options from the options database */ 1964 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1965 { 1966 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1967 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1968 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1969 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1970 MumpsScalar *arr; 1971 1972 PetscFunctionBegin; 1973 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1974 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1975 PetscInt nthreads = 0; 1976 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1977 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1978 1979 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1980 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1981 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1982 1983 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1984 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1985 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1986 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1987 if (mumps->use_petsc_omp_support) { 1988 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1989 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1990 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1991 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1992 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1993 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1994 #endif 1995 } else { 1996 mumps->omp_comm = PETSC_COMM_SELF; 1997 mumps->mumps_comm = mumps->petsc_comm; 1998 mumps->is_omp_master = PETSC_TRUE; 1999 } 2000 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2001 mumps->reqs = NULL; 2002 mumps->tag = 0; 2003 2004 if (mumps->mumps_comm != MPI_COMM_NULL) { 2005 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2006 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2007 MPI_Comm comm; 2008 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2009 mumps->mumps_comm = comm; 2010 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2011 } 2012 2013 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2014 mumps->id.job = JOB_INIT; 2015 mumps->id.par = 1; /* host participates factorizaton and solve */ 2016 mumps->id.sym = mumps->sym; 2017 2018 size = mumps->id.size_schur; 2019 arr = mumps->id.schur; 2020 listvar_schur = mumps->id.listvar_schur; 2021 PetscMUMPS_c(mumps); 2022 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2023 /* restore cached ICNTL and CNTL values */ 2024 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2025 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2026 PetscCall(PetscFree(mumps->ICNTL_pre)); 2027 PetscCall(PetscFree(mumps->CNTL_pre)); 2028 2029 if (schur) { 2030 mumps->id.size_schur = size; 2031 mumps->id.schur_lld = size; 2032 mumps->id.schur = arr; 2033 mumps->id.listvar_schur = listvar_schur; 2034 if (mumps->petsc_size > 1) { 2035 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2036 2037 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2038 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2039 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2040 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2041 } else { 2042 if (F->factortype == MAT_FACTOR_LU) { 2043 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2044 } else { 2045 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2046 } 2047 } 2048 mumps->id.ICNTL(26) = -1; 2049 } 2050 2051 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2052 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2053 */ 2054 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2055 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2056 2057 mumps->scat_rhs = NULL; 2058 mumps->scat_sol = NULL; 2059 2060 /* set PETSc-MUMPS default options - override MUMPS default */ 2061 mumps->id.ICNTL(3) = 0; 2062 mumps->id.ICNTL(4) = 0; 2063 if (mumps->petsc_size == 1) { 2064 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2065 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2066 } else { 2067 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2068 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2069 } 2070 } 2071 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2072 if (flg) mumps->id.ICNTL(1) = icntl; 2073 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2074 if (flg) mumps->id.ICNTL(2) = icntl; 2075 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2076 if (flg) mumps->id.ICNTL(3) = icntl; 2077 2078 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2079 if (flg) mumps->id.ICNTL(4) = icntl; 2080 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2081 2082 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2083 if (flg) mumps->id.ICNTL(6) = icntl; 2084 2085 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2086 if (flg) { 2087 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2088 mumps->id.ICNTL(7) = icntl; 2089 } 2090 2091 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2092 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2093 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2094 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2095 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2096 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2097 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2098 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2099 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2100 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2101 if (flg) { 2102 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2103 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2104 } 2105 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2106 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2107 PetscCall(MatDestroy(&F->schur)); 2108 PetscCall(MatMumpsResetSchur_Private(mumps)); 2109 } 2110 2111 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2112 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2113 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2114 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2115 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2116 In short, we could not use distributed RHS with MPICH until v4.0b1. 2117 */ 2118 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2119 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2120 #else 2121 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2122 #endif 2123 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2124 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2125 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2126 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2127 #endif 2128 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2129 2130 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2131 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2132 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2133 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2134 2135 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2136 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2137 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2138 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2139 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2140 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2141 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2142 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2143 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2144 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2145 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2146 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2147 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2148 2149 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2150 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2151 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2152 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2153 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2154 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2155 2156 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2157 2158 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2159 if (ninfo) { 2160 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2161 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2162 mumps->ninfo = ninfo; 2163 for (i = 0; i < ninfo; i++) { 2164 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2165 mumps->info[i] = info[i]; 2166 } 2167 } 2168 PetscOptionsEnd(); 2169 PetscFunctionReturn(PETSC_SUCCESS); 2170 } 2171 2172 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2173 { 2174 PetscFunctionBegin; 2175 if (mumps->id.INFOG(1) < 0) { 2176 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2177 if (mumps->id.INFOG(1) == -6) { 2178 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2179 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2180 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2181 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2182 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2183 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2184 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2185 } else { 2186 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2187 F->factorerrortype = MAT_FACTOR_OTHER; 2188 } 2189 } 2190 PetscFunctionReturn(PETSC_SUCCESS); 2191 } 2192 2193 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2194 { 2195 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2196 Vec b; 2197 const PetscInt M = A->rmap->N; 2198 2199 PetscFunctionBegin; 2200 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2201 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2202 PetscFunctionReturn(PETSC_SUCCESS); 2203 } 2204 2205 /* Set MUMPS options from the options database */ 2206 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2207 2208 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2209 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2210 2211 /* analysis phase */ 2212 mumps->id.job = JOB_FACTSYMBOLIC; 2213 mumps->id.n = M; 2214 switch (mumps->id.ICNTL(18)) { 2215 case 0: /* centralized assembled matrix input */ 2216 if (!mumps->myid) { 2217 mumps->id.nnz = mumps->nnz; 2218 mumps->id.irn = mumps->irn; 2219 mumps->id.jcn = mumps->jcn; 2220 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2221 if (r) { 2222 mumps->id.ICNTL(7) = 1; 2223 if (!mumps->myid) { 2224 const PetscInt *idx; 2225 PetscInt i; 2226 2227 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2228 PetscCall(ISGetIndices(r, &idx)); 2229 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2230 PetscCall(ISRestoreIndices(r, &idx)); 2231 } 2232 } 2233 } 2234 break; 2235 case 3: /* distributed assembled matrix input (size>1) */ 2236 mumps->id.nnz_loc = mumps->nnz; 2237 mumps->id.irn_loc = mumps->irn; 2238 mumps->id.jcn_loc = mumps->jcn; 2239 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2240 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2241 PetscCall(MatCreateVecs(A, NULL, &b)); 2242 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2243 PetscCall(VecDestroy(&b)); 2244 } 2245 break; 2246 } 2247 PetscMUMPS_c(mumps); 2248 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2249 2250 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2251 F->ops->solve = MatSolve_MUMPS; 2252 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2253 F->ops->matsolve = MatMatSolve_MUMPS; 2254 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2255 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2256 2257 mumps->matstruc = SAME_NONZERO_PATTERN; 2258 PetscFunctionReturn(PETSC_SUCCESS); 2259 } 2260 2261 /* Note the Petsc r and c permutations are ignored */ 2262 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2263 { 2264 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2265 Vec b; 2266 const PetscInt M = A->rmap->N; 2267 2268 PetscFunctionBegin; 2269 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2270 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2271 PetscFunctionReturn(PETSC_SUCCESS); 2272 } 2273 2274 /* Set MUMPS options from the options database */ 2275 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2276 2277 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2278 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2279 2280 /* analysis phase */ 2281 mumps->id.job = JOB_FACTSYMBOLIC; 2282 mumps->id.n = M; 2283 switch (mumps->id.ICNTL(18)) { 2284 case 0: /* centralized assembled matrix input */ 2285 if (!mumps->myid) { 2286 mumps->id.nnz = mumps->nnz; 2287 mumps->id.irn = mumps->irn; 2288 mumps->id.jcn = mumps->jcn; 2289 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2290 } 2291 break; 2292 case 3: /* distributed assembled matrix input (size>1) */ 2293 mumps->id.nnz_loc = mumps->nnz; 2294 mumps->id.irn_loc = mumps->irn; 2295 mumps->id.jcn_loc = mumps->jcn; 2296 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2297 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2298 PetscCall(MatCreateVecs(A, NULL, &b)); 2299 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2300 PetscCall(VecDestroy(&b)); 2301 } 2302 break; 2303 } 2304 PetscMUMPS_c(mumps); 2305 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2306 2307 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2308 F->ops->solve = MatSolve_MUMPS; 2309 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2310 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2311 2312 mumps->matstruc = SAME_NONZERO_PATTERN; 2313 PetscFunctionReturn(PETSC_SUCCESS); 2314 } 2315 2316 /* Note the Petsc r permutation and factor info are ignored */ 2317 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2318 { 2319 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2320 Vec b; 2321 const PetscInt M = A->rmap->N; 2322 2323 PetscFunctionBegin; 2324 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2325 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2326 PetscFunctionReturn(PETSC_SUCCESS); 2327 } 2328 2329 /* Set MUMPS options from the options database */ 2330 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2331 2332 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2333 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2334 2335 /* analysis phase */ 2336 mumps->id.job = JOB_FACTSYMBOLIC; 2337 mumps->id.n = M; 2338 switch (mumps->id.ICNTL(18)) { 2339 case 0: /* centralized assembled matrix input */ 2340 if (!mumps->myid) { 2341 mumps->id.nnz = mumps->nnz; 2342 mumps->id.irn = mumps->irn; 2343 mumps->id.jcn = mumps->jcn; 2344 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2345 } 2346 break; 2347 case 3: /* distributed assembled matrix input (size>1) */ 2348 mumps->id.nnz_loc = mumps->nnz; 2349 mumps->id.irn_loc = mumps->irn; 2350 mumps->id.jcn_loc = mumps->jcn; 2351 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2352 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2353 PetscCall(MatCreateVecs(A, NULL, &b)); 2354 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2355 PetscCall(VecDestroy(&b)); 2356 } 2357 break; 2358 } 2359 PetscMUMPS_c(mumps); 2360 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2361 2362 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2363 F->ops->solve = MatSolve_MUMPS; 2364 F->ops->solvetranspose = MatSolve_MUMPS; 2365 F->ops->matsolve = MatMatSolve_MUMPS; 2366 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2367 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2368 #if defined(PETSC_USE_COMPLEX) 2369 F->ops->getinertia = NULL; 2370 #else 2371 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2372 #endif 2373 2374 mumps->matstruc = SAME_NONZERO_PATTERN; 2375 PetscFunctionReturn(PETSC_SUCCESS); 2376 } 2377 2378 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2379 { 2380 PetscBool iascii; 2381 PetscViewerFormat format; 2382 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2383 2384 PetscFunctionBegin; 2385 /* check if matrix is mumps type */ 2386 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2387 2388 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2389 if (iascii) { 2390 PetscCall(PetscViewerGetFormat(viewer, &format)); 2391 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2392 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2393 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2394 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2395 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2396 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2397 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2398 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2399 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2400 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2401 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2402 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2403 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2404 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2405 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2406 if (mumps->id.ICNTL(11) > 0) { 2407 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2408 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2409 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2410 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2411 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2412 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2413 } 2414 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2415 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2416 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2417 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2418 /* ICNTL(15-17) not used */ 2419 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2420 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2421 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2422 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2423 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2424 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2425 2426 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2427 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2428 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2429 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2430 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2431 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2432 2433 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2434 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2435 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2436 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2437 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2438 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2439 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2440 2441 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2442 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2443 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2444 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2445 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2446 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2447 2448 /* information local to each processor */ 2449 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2450 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2451 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2452 PetscCall(PetscViewerFlush(viewer)); 2453 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2454 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2455 PetscCall(PetscViewerFlush(viewer)); 2456 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2457 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2458 PetscCall(PetscViewerFlush(viewer)); 2459 2460 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2461 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2462 PetscCall(PetscViewerFlush(viewer)); 2463 2464 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2465 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2466 PetscCall(PetscViewerFlush(viewer)); 2467 2468 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2469 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2470 PetscCall(PetscViewerFlush(viewer)); 2471 2472 if (mumps->ninfo && mumps->ninfo <= 80) { 2473 PetscInt i; 2474 for (i = 0; i < mumps->ninfo; i++) { 2475 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2476 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2477 PetscCall(PetscViewerFlush(viewer)); 2478 } 2479 } 2480 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2481 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2482 2483 if (mumps->myid == 0) { /* information from the host */ 2484 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2486 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2487 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2488 2489 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2490 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2491 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2494 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2495 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2498 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2499 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2520 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2521 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2522 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2523 } 2524 } 2525 } 2526 PetscFunctionReturn(PETSC_SUCCESS); 2527 } 2528 2529 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2530 { 2531 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2532 2533 PetscFunctionBegin; 2534 info->block_size = 1.0; 2535 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2536 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2537 info->nz_unneeded = 0.0; 2538 info->assemblies = 0.0; 2539 info->mallocs = 0.0; 2540 info->memory = 0.0; 2541 info->fill_ratio_given = 0; 2542 info->fill_ratio_needed = 0; 2543 info->factor_mallocs = 0; 2544 PetscFunctionReturn(PETSC_SUCCESS); 2545 } 2546 2547 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2548 { 2549 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2550 const PetscScalar *arr; 2551 const PetscInt *idxs; 2552 PetscInt size, i; 2553 2554 PetscFunctionBegin; 2555 PetscCall(ISGetLocalSize(is, &size)); 2556 /* Schur complement matrix */ 2557 PetscCall(MatDestroy(&F->schur)); 2558 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2559 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2560 mumps->id.schur = (MumpsScalar *)arr; 2561 mumps->id.size_schur = size; 2562 mumps->id.schur_lld = size; 2563 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2564 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2565 2566 /* MUMPS expects Fortran style indices */ 2567 PetscCall(PetscFree(mumps->id.listvar_schur)); 2568 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2569 PetscCall(ISGetIndices(is, &idxs)); 2570 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2571 PetscCall(ISRestoreIndices(is, &idxs)); 2572 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2573 mumps->id.ICNTL(26) = -1; 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2578 { 2579 Mat St; 2580 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2581 PetscScalar *array; 2582 #if defined(PETSC_USE_COMPLEX) 2583 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2584 #endif 2585 2586 PetscFunctionBegin; 2587 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2588 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2589 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2590 PetscCall(MatSetType(St, MATDENSE)); 2591 PetscCall(MatSetUp(St)); 2592 PetscCall(MatDenseGetArray(St, &array)); 2593 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2594 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2595 PetscInt i, j, N = mumps->id.size_schur; 2596 for (i = 0; i < N; i++) { 2597 for (j = 0; j < N; j++) { 2598 #if !defined(PETSC_USE_COMPLEX) 2599 PetscScalar val = mumps->id.schur[i * N + j]; 2600 #else 2601 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2602 #endif 2603 array[j * N + i] = val; 2604 } 2605 } 2606 } else { /* stored by columns */ 2607 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2608 } 2609 } else { /* either full or lower-triangular (not packed) */ 2610 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2611 PetscInt i, j, N = mumps->id.size_schur; 2612 for (i = 0; i < N; i++) { 2613 for (j = i; j < N; j++) { 2614 #if !defined(PETSC_USE_COMPLEX) 2615 PetscScalar val = mumps->id.schur[i * N + j]; 2616 #else 2617 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2618 #endif 2619 array[i * N + j] = val; 2620 array[j * N + i] = val; 2621 } 2622 } 2623 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2624 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2625 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2626 PetscInt i, j, N = mumps->id.size_schur; 2627 for (i = 0; i < N; i++) { 2628 for (j = 0; j < i + 1; j++) { 2629 #if !defined(PETSC_USE_COMPLEX) 2630 PetscScalar val = mumps->id.schur[i * N + j]; 2631 #else 2632 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2633 #endif 2634 array[i * N + j] = val; 2635 array[j * N + i] = val; 2636 } 2637 } 2638 } 2639 } 2640 PetscCall(MatDenseRestoreArray(St, &array)); 2641 *S = St; 2642 PetscFunctionReturn(PETSC_SUCCESS); 2643 } 2644 2645 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2646 { 2647 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2648 2649 PetscFunctionBegin; 2650 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2651 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2652 for (i = 0; i < nICNTL_pre; ++i) 2653 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2654 if (i == nICNTL_pre) { /* not already cached */ 2655 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2656 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2657 mumps->ICNTL_pre[0]++; 2658 } 2659 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2660 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2661 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2662 PetscFunctionReturn(PETSC_SUCCESS); 2663 } 2664 2665 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2666 { 2667 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2668 2669 PetscFunctionBegin; 2670 if (mumps->id.job == JOB_NULL) { 2671 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2672 *ival = 0; 2673 for (i = 0; i < nICNTL_pre; ++i) { 2674 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2675 } 2676 } else *ival = mumps->id.ICNTL(icntl); 2677 PetscFunctionReturn(PETSC_SUCCESS); 2678 } 2679 2680 /*@ 2681 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2682 2683 Logically Collective 2684 2685 Input Parameters: 2686 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2687 . icntl - index of MUMPS parameter array ICNTL() 2688 - ival - value of MUMPS ICNTL(icntl) 2689 2690 Options Database Key: 2691 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2692 2693 Level: beginner 2694 2695 References: 2696 . * - MUMPS Users' Guide 2697 2698 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2699 @*/ 2700 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2701 { 2702 PetscFunctionBegin; 2703 PetscValidType(F, 1); 2704 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2705 PetscValidLogicalCollectiveInt(F, icntl, 2); 2706 PetscValidLogicalCollectiveInt(F, ival, 3); 2707 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2708 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2709 PetscFunctionReturn(PETSC_SUCCESS); 2710 } 2711 2712 /*@ 2713 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2714 2715 Logically Collective 2716 2717 Input Parameters: 2718 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2719 - icntl - index of MUMPS parameter array ICNTL() 2720 2721 Output Parameter: 2722 . ival - value of MUMPS ICNTL(icntl) 2723 2724 Level: beginner 2725 2726 References: 2727 . * - MUMPS Users' Guide 2728 2729 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2730 @*/ 2731 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2732 { 2733 PetscFunctionBegin; 2734 PetscValidType(F, 1); 2735 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2736 PetscValidLogicalCollectiveInt(F, icntl, 2); 2737 PetscAssertPointer(ival, 3); 2738 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2739 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2740 PetscFunctionReturn(PETSC_SUCCESS); 2741 } 2742 2743 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2744 { 2745 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2746 2747 PetscFunctionBegin; 2748 if (mumps->id.job == JOB_NULL) { 2749 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2750 for (i = 0; i < nCNTL_pre; ++i) 2751 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2752 if (i == nCNTL_pre) { 2753 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2754 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2755 mumps->CNTL_pre[0]++; 2756 } 2757 mumps->CNTL_pre[1 + 2 * i] = icntl; 2758 mumps->CNTL_pre[2 + 2 * i] = val; 2759 } else mumps->id.CNTL(icntl) = val; 2760 PetscFunctionReturn(PETSC_SUCCESS); 2761 } 2762 2763 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2764 { 2765 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2766 2767 PetscFunctionBegin; 2768 if (mumps->id.job == JOB_NULL) { 2769 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2770 *val = 0.0; 2771 for (i = 0; i < nCNTL_pre; ++i) { 2772 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2773 } 2774 } else *val = mumps->id.CNTL(icntl); 2775 PetscFunctionReturn(PETSC_SUCCESS); 2776 } 2777 2778 /*@ 2779 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2780 2781 Logically Collective 2782 2783 Input Parameters: 2784 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2785 . icntl - index of MUMPS parameter array CNTL() 2786 - val - value of MUMPS CNTL(icntl) 2787 2788 Options Database Key: 2789 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2790 2791 Level: beginner 2792 2793 References: 2794 . * - MUMPS Users' Guide 2795 2796 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2797 @*/ 2798 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2799 { 2800 PetscFunctionBegin; 2801 PetscValidType(F, 1); 2802 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2803 PetscValidLogicalCollectiveInt(F, icntl, 2); 2804 PetscValidLogicalCollectiveReal(F, val, 3); 2805 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2806 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2807 PetscFunctionReturn(PETSC_SUCCESS); 2808 } 2809 2810 /*@ 2811 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2812 2813 Logically Collective 2814 2815 Input Parameters: 2816 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2817 - icntl - index of MUMPS parameter array CNTL() 2818 2819 Output Parameter: 2820 . val - value of MUMPS CNTL(icntl) 2821 2822 Level: beginner 2823 2824 References: 2825 . * - MUMPS Users' Guide 2826 2827 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2828 @*/ 2829 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2830 { 2831 PetscFunctionBegin; 2832 PetscValidType(F, 1); 2833 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2834 PetscValidLogicalCollectiveInt(F, icntl, 2); 2835 PetscAssertPointer(val, 3); 2836 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2837 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2838 PetscFunctionReturn(PETSC_SUCCESS); 2839 } 2840 2841 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2842 { 2843 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2844 2845 PetscFunctionBegin; 2846 *info = mumps->id.INFO(icntl); 2847 PetscFunctionReturn(PETSC_SUCCESS); 2848 } 2849 2850 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2851 { 2852 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2853 2854 PetscFunctionBegin; 2855 *infog = mumps->id.INFOG(icntl); 2856 PetscFunctionReturn(PETSC_SUCCESS); 2857 } 2858 2859 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2860 { 2861 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2862 2863 PetscFunctionBegin; 2864 *rinfo = mumps->id.RINFO(icntl); 2865 PetscFunctionReturn(PETSC_SUCCESS); 2866 } 2867 2868 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2869 { 2870 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2871 2872 PetscFunctionBegin; 2873 *rinfog = mumps->id.RINFOG(icntl); 2874 PetscFunctionReturn(PETSC_SUCCESS); 2875 } 2876 2877 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2878 { 2879 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2880 2881 PetscFunctionBegin; 2882 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2883 *size = 0; 2884 *array = NULL; 2885 if (!mumps->myid) { 2886 *size = mumps->id.INFOG(28); 2887 PetscCall(PetscMalloc1(*size, array)); 2888 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2889 } 2890 PetscFunctionReturn(PETSC_SUCCESS); 2891 } 2892 2893 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2894 { 2895 Mat Bt = NULL, Btseq = NULL; 2896 PetscBool flg; 2897 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2898 PetscScalar *aa; 2899 PetscInt spnr, *ia, *ja, M, nrhs; 2900 2901 PetscFunctionBegin; 2902 PetscAssertPointer(spRHS, 2); 2903 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2904 if (flg) { 2905 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2906 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2907 2908 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2909 2910 if (mumps->petsc_size > 1) { 2911 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2912 Btseq = b->A; 2913 } else { 2914 Btseq = Bt; 2915 } 2916 2917 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2918 mumps->id.nrhs = nrhs; 2919 mumps->id.lrhs = M; 2920 mumps->id.rhs = NULL; 2921 2922 if (!mumps->myid) { 2923 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2924 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2925 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2926 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2927 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2928 } else { 2929 mumps->id.irhs_ptr = NULL; 2930 mumps->id.irhs_sparse = NULL; 2931 mumps->id.nz_rhs = 0; 2932 mumps->id.rhs_sparse = NULL; 2933 } 2934 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2935 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2936 2937 /* solve phase */ 2938 mumps->id.job = JOB_SOLVE; 2939 PetscMUMPS_c(mumps); 2940 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2941 2942 if (!mumps->myid) { 2943 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2944 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2945 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2946 } 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 /*@ 2951 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2952 2953 Logically Collective 2954 2955 Input Parameter: 2956 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2957 2958 Output Parameter: 2959 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2960 2961 Level: beginner 2962 2963 References: 2964 . * - MUMPS Users' Guide 2965 2966 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2967 @*/ 2968 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2969 { 2970 PetscFunctionBegin; 2971 PetscValidType(F, 1); 2972 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2973 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2974 PetscFunctionReturn(PETSC_SUCCESS); 2975 } 2976 2977 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2978 { 2979 Mat spRHS; 2980 2981 PetscFunctionBegin; 2982 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2983 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2984 PetscCall(MatDestroy(&spRHS)); 2985 PetscFunctionReturn(PETSC_SUCCESS); 2986 } 2987 2988 /*@ 2989 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 2990 2991 Logically Collective 2992 2993 Input Parameter: 2994 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2995 2996 Output Parameter: 2997 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 2998 2999 Level: beginner 3000 3001 References: 3002 . * - MUMPS Users' Guide 3003 3004 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3005 @*/ 3006 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3007 { 3008 PetscBool flg; 3009 3010 PetscFunctionBegin; 3011 PetscValidType(F, 1); 3012 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3013 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3014 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3015 3016 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3017 PetscFunctionReturn(PETSC_SUCCESS); 3018 } 3019 3020 /*@ 3021 MatMumpsGetInfo - Get MUMPS parameter INFO() 3022 3023 Logically Collective 3024 3025 Input Parameters: 3026 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3027 - icntl - index of MUMPS parameter array INFO() 3028 3029 Output Parameter: 3030 . ival - value of MUMPS INFO(icntl) 3031 3032 Level: beginner 3033 3034 References: 3035 . * - MUMPS Users' Guide 3036 3037 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3038 @*/ 3039 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3040 { 3041 PetscFunctionBegin; 3042 PetscValidType(F, 1); 3043 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3044 PetscAssertPointer(ival, 3); 3045 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3046 PetscFunctionReturn(PETSC_SUCCESS); 3047 } 3048 3049 /*@ 3050 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3051 3052 Logically Collective 3053 3054 Input Parameters: 3055 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3056 - icntl - index of MUMPS parameter array INFOG() 3057 3058 Output Parameter: 3059 . ival - value of MUMPS INFOG(icntl) 3060 3061 Level: beginner 3062 3063 References: 3064 . * - MUMPS Users' Guide 3065 3066 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3067 @*/ 3068 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3069 { 3070 PetscFunctionBegin; 3071 PetscValidType(F, 1); 3072 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3073 PetscAssertPointer(ival, 3); 3074 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3075 PetscFunctionReturn(PETSC_SUCCESS); 3076 } 3077 3078 /*@ 3079 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3080 3081 Logically Collective 3082 3083 Input Parameters: 3084 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3085 - icntl - index of MUMPS parameter array RINFO() 3086 3087 Output Parameter: 3088 . val - value of MUMPS RINFO(icntl) 3089 3090 Level: beginner 3091 3092 References: 3093 . * - MUMPS Users' Guide 3094 3095 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3096 @*/ 3097 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3098 { 3099 PetscFunctionBegin; 3100 PetscValidType(F, 1); 3101 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3102 PetscAssertPointer(val, 3); 3103 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3104 PetscFunctionReturn(PETSC_SUCCESS); 3105 } 3106 3107 /*@ 3108 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3109 3110 Logically Collective 3111 3112 Input Parameters: 3113 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3114 - icntl - index of MUMPS parameter array RINFOG() 3115 3116 Output Parameter: 3117 . val - value of MUMPS RINFOG(icntl) 3118 3119 Level: beginner 3120 3121 References: 3122 . * - MUMPS Users' Guide 3123 3124 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3125 @*/ 3126 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3127 { 3128 PetscFunctionBegin; 3129 PetscValidType(F, 1); 3130 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3131 PetscAssertPointer(val, 3); 3132 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3133 PetscFunctionReturn(PETSC_SUCCESS); 3134 } 3135 3136 /*@ 3137 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3138 3139 Logically Collective 3140 3141 Input Parameter: 3142 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3143 3144 Output Parameters: 3145 + size - local size of the array. The size of the array is non-zero only on the host. 3146 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3147 for freeing this array. 3148 3149 Level: beginner 3150 3151 References: 3152 . * - MUMPS Users' Guide 3153 3154 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3155 @*/ 3156 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3157 { 3158 PetscFunctionBegin; 3159 PetscValidType(F, 1); 3160 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3161 PetscAssertPointer(size, 2); 3162 PetscAssertPointer(array, 3); 3163 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3164 PetscFunctionReturn(PETSC_SUCCESS); 3165 } 3166 3167 /*MC 3168 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3169 distributed and sequential matrices via the external package MUMPS. 3170 3171 Works with `MATAIJ` and `MATSBAIJ` matrices 3172 3173 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3174 3175 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3176 See details below. 3177 3178 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3179 3180 Options Database Keys: 3181 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3182 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3183 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3184 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3185 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3186 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3187 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3188 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3189 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3190 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3191 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3192 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3193 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3194 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3195 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3196 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3197 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3198 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3199 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3200 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3201 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3202 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3203 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3204 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3205 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3206 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3207 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3208 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3209 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3210 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3211 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3212 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3213 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3214 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3215 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3216 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3217 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3218 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3219 3220 Level: beginner 3221 3222 Notes: 3223 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3224 error if the matrix is Hermitian. 3225 3226 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3227 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3228 3229 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3230 the failure with 3231 .vb 3232 KSPGetPC(ksp,&pc); 3233 PCFactorGetMatrix(pc,&mat); 3234 MatMumpsGetInfo(mat,....); 3235 MatMumpsGetInfog(mat,....); etc. 3236 .ve 3237 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3238 3239 MUMPS provides 64-bit integer support in two build modes: 3240 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3241 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3242 3243 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3244 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3245 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3246 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3247 3248 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3249 3250 Two modes to run MUMPS/PETSc with OpenMP 3251 .vb 3252 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3253 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3254 .ve 3255 3256 .vb 3257 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3258 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3259 .ve 3260 3261 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3262 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3263 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3264 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3265 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3266 3267 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3268 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3269 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3270 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3271 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3272 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3273 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3274 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3275 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3276 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3277 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3278 examine the mapping result. 3279 3280 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3281 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3282 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3283 3284 References: 3285 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3286 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3287 3288 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3289 M*/ 3290 3291 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3292 { 3293 PetscFunctionBegin; 3294 *type = MATSOLVERMUMPS; 3295 PetscFunctionReturn(PETSC_SUCCESS); 3296 } 3297 3298 /* MatGetFactor for Seq and MPI AIJ matrices */ 3299 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3300 { 3301 Mat B; 3302 Mat_MUMPS *mumps; 3303 PetscBool isSeqAIJ; 3304 PetscMPIInt size; 3305 3306 PetscFunctionBegin; 3307 #if defined(PETSC_USE_COMPLEX) 3308 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3309 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3310 *F = NULL; 3311 PetscFunctionReturn(PETSC_SUCCESS); 3312 } 3313 #endif 3314 /* Create the factorization matrix */ 3315 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3316 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3317 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3318 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3319 PetscCall(MatSetUp(B)); 3320 3321 PetscCall(PetscNew(&mumps)); 3322 3323 B->ops->view = MatView_MUMPS; 3324 B->ops->getinfo = MatGetInfo_MUMPS; 3325 3326 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3327 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3328 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3329 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3330 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3331 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3332 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3333 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3334 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3335 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3336 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3337 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3338 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3339 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3340 3341 if (ftype == MAT_FACTOR_LU) { 3342 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3343 B->factortype = MAT_FACTOR_LU; 3344 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3345 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3346 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3347 mumps->sym = 0; 3348 } else { 3349 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3350 B->factortype = MAT_FACTOR_CHOLESKY; 3351 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3352 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3353 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3354 #if defined(PETSC_USE_COMPLEX) 3355 mumps->sym = 2; 3356 #else 3357 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3358 else mumps->sym = 2; 3359 #endif 3360 } 3361 3362 /* set solvertype */ 3363 PetscCall(PetscFree(B->solvertype)); 3364 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3365 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3366 if (size == 1) { 3367 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3368 B->canuseordering = PETSC_TRUE; 3369 } 3370 B->ops->destroy = MatDestroy_MUMPS; 3371 B->data = (void *)mumps; 3372 3373 *F = B; 3374 mumps->id.job = JOB_NULL; 3375 mumps->ICNTL_pre = NULL; 3376 mumps->CNTL_pre = NULL; 3377 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3378 PetscFunctionReturn(PETSC_SUCCESS); 3379 } 3380 3381 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3382 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3383 { 3384 Mat B; 3385 Mat_MUMPS *mumps; 3386 PetscBool isSeqSBAIJ; 3387 PetscMPIInt size; 3388 3389 PetscFunctionBegin; 3390 #if defined(PETSC_USE_COMPLEX) 3391 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3392 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3393 *F = NULL; 3394 PetscFunctionReturn(PETSC_SUCCESS); 3395 } 3396 #endif 3397 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3398 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3399 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3400 PetscCall(MatSetUp(B)); 3401 3402 PetscCall(PetscNew(&mumps)); 3403 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3404 if (isSeqSBAIJ) { 3405 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3406 } else { 3407 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3408 } 3409 3410 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3411 B->ops->view = MatView_MUMPS; 3412 B->ops->getinfo = MatGetInfo_MUMPS; 3413 3414 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3415 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3416 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3417 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3418 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3419 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3420 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3421 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3422 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3423 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3424 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3425 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3426 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3427 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3428 3429 B->factortype = MAT_FACTOR_CHOLESKY; 3430 #if defined(PETSC_USE_COMPLEX) 3431 mumps->sym = 2; 3432 #else 3433 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3434 else mumps->sym = 2; 3435 #endif 3436 3437 /* set solvertype */ 3438 PetscCall(PetscFree(B->solvertype)); 3439 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3440 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3441 if (size == 1) { 3442 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3443 B->canuseordering = PETSC_TRUE; 3444 } 3445 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3446 B->ops->destroy = MatDestroy_MUMPS; 3447 B->data = (void *)mumps; 3448 3449 *F = B; 3450 mumps->id.job = JOB_NULL; 3451 mumps->ICNTL_pre = NULL; 3452 mumps->CNTL_pre = NULL; 3453 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3454 PetscFunctionReturn(PETSC_SUCCESS); 3455 } 3456 3457 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3458 { 3459 Mat B; 3460 Mat_MUMPS *mumps; 3461 PetscBool isSeqBAIJ; 3462 PetscMPIInt size; 3463 3464 PetscFunctionBegin; 3465 /* Create the factorization matrix */ 3466 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3467 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3468 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3469 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3470 PetscCall(MatSetUp(B)); 3471 3472 PetscCall(PetscNew(&mumps)); 3473 if (ftype == MAT_FACTOR_LU) { 3474 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3475 B->factortype = MAT_FACTOR_LU; 3476 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3477 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3478 mumps->sym = 0; 3479 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3480 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3481 3482 B->ops->view = MatView_MUMPS; 3483 B->ops->getinfo = MatGetInfo_MUMPS; 3484 3485 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3486 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3487 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3488 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3489 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3490 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3491 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3492 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3493 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3494 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3495 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3496 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3497 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3498 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3499 3500 /* set solvertype */ 3501 PetscCall(PetscFree(B->solvertype)); 3502 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3503 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3504 if (size == 1) { 3505 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3506 B->canuseordering = PETSC_TRUE; 3507 } 3508 B->ops->destroy = MatDestroy_MUMPS; 3509 B->data = (void *)mumps; 3510 3511 *F = B; 3512 mumps->id.job = JOB_NULL; 3513 mumps->ICNTL_pre = NULL; 3514 mumps->CNTL_pre = NULL; 3515 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3516 PetscFunctionReturn(PETSC_SUCCESS); 3517 } 3518 3519 /* MatGetFactor for Seq and MPI SELL matrices */ 3520 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3521 { 3522 Mat B; 3523 Mat_MUMPS *mumps; 3524 PetscBool isSeqSELL; 3525 PetscMPIInt size; 3526 3527 PetscFunctionBegin; 3528 /* Create the factorization matrix */ 3529 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3530 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3531 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3532 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3533 PetscCall(MatSetUp(B)); 3534 3535 PetscCall(PetscNew(&mumps)); 3536 3537 B->ops->view = MatView_MUMPS; 3538 B->ops->getinfo = MatGetInfo_MUMPS; 3539 3540 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3545 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3547 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3548 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3549 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3550 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3551 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3552 3553 if (ftype == MAT_FACTOR_LU) { 3554 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3555 B->factortype = MAT_FACTOR_LU; 3556 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3557 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3558 mumps->sym = 0; 3559 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3560 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3561 3562 /* set solvertype */ 3563 PetscCall(PetscFree(B->solvertype)); 3564 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3565 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3566 if (size == 1) { 3567 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3568 B->canuseordering = PETSC_TRUE; 3569 } 3570 B->ops->destroy = MatDestroy_MUMPS; 3571 B->data = (void *)mumps; 3572 3573 *F = B; 3574 mumps->id.job = JOB_NULL; 3575 mumps->ICNTL_pre = NULL; 3576 mumps->CNTL_pre = NULL; 3577 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3578 PetscFunctionReturn(PETSC_SUCCESS); 3579 } 3580 3581 /* MatGetFactor for MATNEST matrices */ 3582 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3583 { 3584 Mat B, **mats; 3585 Mat_MUMPS *mumps; 3586 PetscInt nr, nc; 3587 PetscMPIInt size; 3588 PetscBool flg = PETSC_TRUE; 3589 3590 PetscFunctionBegin; 3591 #if defined(PETSC_USE_COMPLEX) 3592 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3593 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3594 *F = NULL; 3595 PetscFunctionReturn(PETSC_SUCCESS); 3596 } 3597 #endif 3598 3599 /* Return if some condition is not satisfied */ 3600 *F = NULL; 3601 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3602 if (ftype == MAT_FACTOR_CHOLESKY) { 3603 IS *rows, *cols; 3604 PetscInt *m, *M; 3605 3606 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3607 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3608 PetscCall(MatNestGetISs(A, rows, cols)); 3609 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3610 if (!flg) { 3611 PetscCall(PetscFree2(rows, cols)); 3612 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3613 PetscFunctionReturn(PETSC_SUCCESS); 3614 } 3615 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3616 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3617 for (PetscInt r = 0; flg && r < nr; r++) 3618 for (PetscInt k = r + 1; flg && k < nr; k++) 3619 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3620 PetscCall(PetscFree2(m, M)); 3621 PetscCall(PetscFree2(rows, cols)); 3622 if (!flg) { 3623 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3624 PetscFunctionReturn(PETSC_SUCCESS); 3625 } 3626 } 3627 3628 for (PetscInt r = 0; r < nr; r++) { 3629 for (PetscInt c = 0; c < nc; c++) { 3630 Mat sub = mats[r][c]; 3631 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans; 3632 3633 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3634 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3635 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3636 else { 3637 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3638 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3639 } 3640 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3641 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3642 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3643 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3644 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3645 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3646 if (ftype == MAT_FACTOR_CHOLESKY) { 3647 if (r == c && !isSeqAIJ && !isMPIAIJ && !isSeqSBAIJ && !isMPISBAIJ) { 3648 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3649 flg = PETSC_FALSE; 3650 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3651 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3652 flg = PETSC_FALSE; 3653 } 3654 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3655 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3656 flg = PETSC_FALSE; 3657 } 3658 } 3659 } 3660 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3661 3662 /* Create the factorization matrix */ 3663 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3664 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3665 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3666 PetscCall(MatSetUp(B)); 3667 3668 PetscCall(PetscNew(&mumps)); 3669 3670 B->ops->view = MatView_MUMPS; 3671 B->ops->getinfo = MatGetInfo_MUMPS; 3672 3673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3675 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3680 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3681 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3682 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3683 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3684 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3685 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3686 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3687 3688 if (ftype == MAT_FACTOR_LU) { 3689 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3690 B->factortype = MAT_FACTOR_LU; 3691 mumps->sym = 0; 3692 } else { 3693 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3694 B->factortype = MAT_FACTOR_CHOLESKY; 3695 #if defined(PETSC_USE_COMPLEX) 3696 mumps->sym = 2; 3697 #else 3698 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3699 else mumps->sym = 2; 3700 #endif 3701 } 3702 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3703 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3704 3705 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3706 if (size == 1) { 3707 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3708 B->canuseordering = PETSC_TRUE; 3709 } 3710 3711 /* set solvertype */ 3712 PetscCall(PetscFree(B->solvertype)); 3713 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3714 B->ops->destroy = MatDestroy_MUMPS; 3715 B->data = (void *)mumps; 3716 3717 *F = B; 3718 mumps->id.job = JOB_NULL; 3719 mumps->ICNTL_pre = NULL; 3720 mumps->CNTL_pre = NULL; 3721 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3722 PetscFunctionReturn(PETSC_SUCCESS); 3723 } 3724 3725 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3726 { 3727 PetscFunctionBegin; 3728 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3729 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3730 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3731 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3732 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3733 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3734 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3735 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3736 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3737 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3738 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3739 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3740 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3741 PetscFunctionReturn(PETSC_SUCCESS); 3742 } 3743