1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <petscsf.h> 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 9 #include <../src/mat/impls/sell/mpi/mpisell.h> 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #if defined(PETSC_USE_REAL_SINGLE) 14 #include <cmumps_c.h> 15 #else 16 #include <zmumps_c.h> 17 #endif 18 #else 19 #if defined(PETSC_USE_REAL_SINGLE) 20 #include <smumps_c.h> 21 #else 22 #include <dmumps_c.h> 23 #endif 24 #endif 25 EXTERN_C_END 26 #define JOB_INIT -1 27 #define JOB_NULL 0 28 #define JOB_FACTSYMBOLIC 1 29 #define JOB_FACTNUMERIC 2 30 #define JOB_SOLVE 3 31 #define JOB_END -2 32 33 /* calls to MUMPS */ 34 #if defined(PETSC_USE_COMPLEX) 35 #if defined(PETSC_USE_REAL_SINGLE) 36 #define MUMPS_c cmumps_c 37 #else 38 #define MUMPS_c zmumps_c 39 #endif 40 #else 41 #if defined(PETSC_USE_REAL_SINGLE) 42 #define MUMPS_c smumps_c 43 #else 44 #define MUMPS_c dmumps_c 45 #endif 46 #endif 47 48 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 49 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 50 naming convention in PetscMPIInt, PetscBLASInt etc. 51 */ 52 typedef MUMPS_INT PetscMUMPSInt; 53 54 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 55 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 56 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 57 #endif 58 #else 59 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 60 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 61 #endif 62 #endif 63 64 #define MPIU_MUMPSINT MPI_INT 65 #define PETSC_MUMPS_INT_MAX 2147483647 66 #define PETSC_MUMPS_INT_MIN -2147483648 67 68 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 69 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 70 { 71 PetscFunctionBegin; 72 #if PetscDefined(USE_64BIT_INDICES) 73 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 74 #endif 75 *b = (PetscMUMPSInt)(a); 76 PetscFunctionReturn(PETSC_SUCCESS); 77 } 78 79 /* Put these utility routines here since they are only used in this file */ 80 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 81 { 82 PetscInt myval; 83 PetscBool myset; 84 PetscFunctionBegin; 85 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 86 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 87 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 88 if (set) *set = myset; 89 PetscFunctionReturn(PETSC_SUCCESS); 90 } 91 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 92 93 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 94 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 95 #define PetscMUMPS_c(mumps) \ 96 do { \ 97 if (mumps->use_petsc_omp_support) { \ 98 if (mumps->is_omp_master) { \ 99 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 100 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 101 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 102 PetscCall(PetscFPTrapPop()); \ 103 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 104 } \ 105 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 106 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 107 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 108 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 109 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 110 */ \ 111 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 112 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 115 } else { \ 116 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 117 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 118 PetscCall(PetscFPTrapPop()); \ 119 } \ 120 } while (0) 121 #else 122 #define PetscMUMPS_c(mumps) \ 123 do { \ 124 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 125 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 126 PetscCall(PetscFPTrapPop()); \ 127 } while (0) 128 #endif 129 130 /* declare MumpsScalar */ 131 #if defined(PETSC_USE_COMPLEX) 132 #if defined(PETSC_USE_REAL_SINGLE) 133 #define MumpsScalar mumps_complex 134 #else 135 #define MumpsScalar mumps_double_complex 136 #endif 137 #else 138 #define MumpsScalar PetscScalar 139 #endif 140 141 /* macros s.t. indices match MUMPS documentation */ 142 #define ICNTL(I) icntl[(I)-1] 143 #define CNTL(I) cntl[(I)-1] 144 #define INFOG(I) infog[(I)-1] 145 #define INFO(I) info[(I)-1] 146 #define RINFOG(I) rinfog[(I)-1] 147 #define RINFO(I) rinfo[(I)-1] 148 149 typedef struct Mat_MUMPS Mat_MUMPS; 150 struct Mat_MUMPS { 151 #if defined(PETSC_USE_COMPLEX) 152 #if defined(PETSC_USE_REAL_SINGLE) 153 CMUMPS_STRUC_C id; 154 #else 155 ZMUMPS_STRUC_C id; 156 #endif 157 #else 158 #if defined(PETSC_USE_REAL_SINGLE) 159 SMUMPS_STRUC_C id; 160 #else 161 DMUMPS_STRUC_C id; 162 #endif 163 #endif 164 165 MatStructure matstruc; 166 PetscMPIInt myid, petsc_size; 167 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 168 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 169 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 170 PetscMUMPSInt sym; 171 MPI_Comm mumps_comm; 172 PetscMUMPSInt *ICNTL_pre; 173 PetscReal *CNTL_pre; 174 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 175 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 176 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 177 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 178 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 179 PetscInt *rhs_nrow, max_nrhs; 180 PetscMPIInt *rhs_recvcounts, *rhs_disps; 181 PetscScalar *rhs_loc, *rhs_recvbuf; 182 #endif 183 Vec b_seq, x_seq; 184 PetscInt ninfo, *info; /* which INFO to display */ 185 PetscInt sizeredrhs; 186 PetscScalar *schur_sol; 187 PetscInt schur_sizesol; 188 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 189 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 190 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 191 192 /* Support for MATNEST */ 193 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 194 PetscInt64 *nest_vals_start; 195 PetscScalar *nest_vals; 196 197 /* stuff used by petsc/mumps OpenMP support*/ 198 PetscBool use_petsc_omp_support; 199 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 200 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 201 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 202 PetscMPIInt tag, omp_comm_size; 203 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 204 MPI_Request *reqs; 205 }; 206 207 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 208 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 209 */ 210 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 211 { 212 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 213 214 PetscFunctionBegin; 215 #if defined(PETSC_USE_64BIT_INDICES) 216 { 217 PetscInt i; 218 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 219 PetscCall(PetscFree(mumps->ia_alloc)); 220 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 221 mumps->cur_ilen = nrow + 1; 222 } 223 if (nnz > mumps->cur_jlen) { 224 PetscCall(PetscFree(mumps->ja_alloc)); 225 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 226 mumps->cur_jlen = nnz; 227 } 228 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 229 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 230 *ia_mumps = mumps->ia_alloc; 231 *ja_mumps = mumps->ja_alloc; 232 } 233 #else 234 *ia_mumps = ia; 235 *ja_mumps = ja; 236 #endif 237 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 242 { 243 PetscFunctionBegin; 244 PetscCall(PetscFree(mumps->id.listvar_schur)); 245 PetscCall(PetscFree(mumps->id.redrhs)); 246 PetscCall(PetscFree(mumps->schur_sol)); 247 mumps->id.size_schur = 0; 248 mumps->id.schur_lld = 0; 249 mumps->id.ICNTL(19) = 0; 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 /* solve with rhs in mumps->id.redrhs and return in the same location */ 254 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 255 { 256 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 257 Mat S, B, X; 258 MatFactorSchurStatus schurstatus; 259 PetscInt sizesol; 260 261 PetscFunctionBegin; 262 PetscCall(MatFactorFactorizeSchurComplement(F)); 263 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 264 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 265 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 266 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 267 PetscCall(MatBindToCPU(B, S->boundtocpu)); 268 #endif 269 switch (schurstatus) { 270 case MAT_FACTOR_SCHUR_FACTORED: 271 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 272 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 274 PetscCall(MatBindToCPU(X, S->boundtocpu)); 275 #endif 276 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 277 PetscCall(MatMatSolveTranspose(S, B, X)); 278 } else { 279 PetscCall(MatMatSolve(S, B, X)); 280 } 281 break; 282 case MAT_FACTOR_SCHUR_INVERTED: 283 sizesol = mumps->id.nrhs * mumps->id.size_schur; 284 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 285 PetscCall(PetscFree(mumps->schur_sol)); 286 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 287 mumps->schur_sizesol = sizesol; 288 } 289 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 290 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 291 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 292 PetscCall(MatBindToCPU(X, S->boundtocpu)); 293 #endif 294 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 295 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 296 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 297 } else { 298 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 299 } 300 PetscCall(MatProductSetFromOptions(X)); 301 PetscCall(MatProductSymbolic(X)); 302 PetscCall(MatProductNumeric(X)); 303 304 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 305 break; 306 default: 307 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 308 } 309 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 310 PetscCall(MatDestroy(&B)); 311 PetscCall(MatDestroy(&X)); 312 PetscFunctionReturn(PETSC_SUCCESS); 313 } 314 315 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 316 { 317 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 318 319 PetscFunctionBegin; 320 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 321 PetscFunctionReturn(PETSC_SUCCESS); 322 } 323 if (!expansion) { /* prepare for the condensation step */ 324 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 325 /* allocate MUMPS internal array to store reduced right-hand sides */ 326 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 327 PetscCall(PetscFree(mumps->id.redrhs)); 328 mumps->id.lredrhs = mumps->id.size_schur; 329 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 330 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 331 } 332 mumps->id.ICNTL(26) = 1; /* condensation phase */ 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 mumps->val = (PetscScalar *)av; 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ai = aa->i; 383 aj = aa->j; 384 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385 for (i = k = 0; i < M; i++) { 386 rnz = ai[i + 1] - ai[i]; 387 ajj = aj + ai[i]; 388 for (j = 0; j < rnz; j++) { 389 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 390 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391 k++; 392 } 393 } 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } 398 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 403 { 404 PetscInt64 nz, i, j, k, r; 405 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 406 PetscMUMPSInt *row, *col; 407 408 PetscFunctionBegin; 409 mumps->val = a->val; 410 if (reuse == MAT_INITIAL_MATRIX) { 411 nz = a->sliidx[a->totalslices]; 412 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413 for (i = k = 0; i < a->totalslices; i++) { 414 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 415 } 416 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417 mumps->irn = row; 418 mumps->jcn = col; 419 mumps->nnz = nz; 420 } 421 PetscFunctionReturn(PETSC_SUCCESS); 422 } 423 424 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 425 { 426 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 427 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 428 PetscInt64 M, nz, idx = 0, rnz, i, j, k, m; 429 PetscInt bs; 430 PetscMUMPSInt *row, *col; 431 432 PetscFunctionBegin; 433 PetscCall(MatGetBlockSize(A, &bs)); 434 M = A->rmap->N / bs; 435 mumps->val = aa->a; 436 if (reuse == MAT_INITIAL_MATRIX) { 437 ai = aa->i; 438 aj = aa->j; 439 nz = bs2 * aa->nz; 440 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 441 for (i = 0; i < M; i++) { 442 ajj = aj + ai[i]; 443 rnz = ai[i + 1] - ai[i]; 444 for (k = 0; k < rnz; k++) { 445 for (j = 0; j < bs; j++) { 446 for (m = 0; m < bs; m++) { 447 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 448 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 449 idx++; 450 } 451 } 452 } 453 } 454 mumps->irn = row; 455 mumps->jcn = col; 456 mumps->nnz = nz; 457 } 458 PetscFunctionReturn(PETSC_SUCCESS); 459 } 460 461 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 462 { 463 const PetscInt *ai, *aj, *ajj; 464 PetscInt bs; 465 PetscInt64 nz, rnz, i, j, k, m; 466 PetscMUMPSInt *row, *col; 467 PetscScalar *val; 468 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 469 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 470 #if defined(PETSC_USE_COMPLEX) 471 PetscBool isset, hermitian; 472 #endif 473 474 PetscFunctionBegin; 475 #if defined(PETSC_USE_COMPLEX) 476 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 477 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 478 #endif 479 ai = aa->i; 480 aj = aa->j; 481 PetscCall(MatGetBlockSize(A, &bs)); 482 if (reuse == MAT_INITIAL_MATRIX) { 483 const PetscInt64 alloc_size = aa->nz * bs2; 484 485 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 486 if (bs > 1) { 487 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 488 mumps->val = mumps->val_alloc; 489 } else { 490 mumps->val = aa->a; 491 } 492 mumps->irn = row; 493 mumps->jcn = col; 494 } else { 495 if (bs == 1) mumps->val = aa->a; 496 row = mumps->irn; 497 col = mumps->jcn; 498 } 499 val = mumps->val; 500 501 nz = 0; 502 if (bs > 1) { 503 for (i = 0; i < mbs; i++) { 504 rnz = ai[i + 1] - ai[i]; 505 ajj = aj + ai[i]; 506 for (j = 0; j < rnz; j++) { 507 for (k = 0; k < bs; k++) { 508 for (m = 0; m < bs; m++) { 509 if (ajj[j] > i || k >= m) { 510 if (reuse == MAT_INITIAL_MATRIX) { 511 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 512 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 513 } 514 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 515 } 516 } 517 } 518 } 519 } 520 } else if (reuse == MAT_INITIAL_MATRIX) { 521 for (i = 0; i < mbs; i++) { 522 rnz = ai[i + 1] - ai[i]; 523 ajj = aj + ai[i]; 524 for (j = 0; j < rnz; j++) { 525 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 526 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 527 nz++; 528 } 529 } 530 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 531 } 532 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 533 PetscFunctionReturn(PETSC_SUCCESS); 534 } 535 536 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 537 { 538 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 539 PetscInt64 nz, rnz, i, j; 540 const PetscScalar *av, *v1; 541 PetscScalar *val; 542 PetscMUMPSInt *row, *col; 543 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 544 PetscBool missing; 545 #if defined(PETSC_USE_COMPLEX) 546 PetscBool hermitian, isset; 547 #endif 548 549 PetscFunctionBegin; 550 #if defined(PETSC_USE_COMPLEX) 551 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 552 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 553 #endif 554 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 555 ai = aa->i; 556 aj = aa->j; 557 adiag = aa->diag; 558 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 559 if (reuse == MAT_INITIAL_MATRIX) { 560 /* count nz in the upper triangular part of A */ 561 nz = 0; 562 if (missing) { 563 for (i = 0; i < M; i++) { 564 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 565 for (j = ai[i]; j < ai[i + 1]; j++) { 566 if (aj[j] < i) continue; 567 nz++; 568 } 569 } else { 570 nz += ai[i + 1] - adiag[i]; 571 } 572 } 573 } else { 574 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 575 } 576 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 577 PetscCall(PetscMalloc1(nz, &val)); 578 mumps->nnz = nz; 579 mumps->irn = row; 580 mumps->jcn = col; 581 mumps->val = mumps->val_alloc = val; 582 583 nz = 0; 584 if (missing) { 585 for (i = 0; i < M; i++) { 586 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 587 for (j = ai[i]; j < ai[i + 1]; j++) { 588 if (aj[j] < i) continue; 589 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 590 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 591 val[nz] = av[j]; 592 nz++; 593 } 594 } else { 595 rnz = ai[i + 1] - adiag[i]; 596 ajj = aj + adiag[i]; 597 v1 = av + adiag[i]; 598 for (j = 0; j < rnz; j++) { 599 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 600 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 601 val[nz++] = v1[j]; 602 } 603 } 604 } 605 } else { 606 for (i = 0; i < M; i++) { 607 rnz = ai[i + 1] - adiag[i]; 608 ajj = aj + adiag[i]; 609 v1 = av + adiag[i]; 610 for (j = 0; j < rnz; j++) { 611 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 612 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 613 val[nz++] = v1[j]; 614 } 615 } 616 } 617 } else { 618 nz = 0; 619 val = mumps->val; 620 if (missing) { 621 for (i = 0; i < M; i++) { 622 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 623 for (j = ai[i]; j < ai[i + 1]; j++) { 624 if (aj[j] < i) continue; 625 val[nz++] = av[j]; 626 } 627 } else { 628 rnz = ai[i + 1] - adiag[i]; 629 v1 = av + adiag[i]; 630 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 631 } 632 } 633 } else { 634 for (i = 0; i < M; i++) { 635 rnz = ai[i + 1] - adiag[i]; 636 v1 = av + adiag[i]; 637 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 638 } 639 } 640 } 641 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 642 PetscFunctionReturn(PETSC_SUCCESS); 643 } 644 645 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 646 { 647 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 648 PetscInt bs; 649 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 650 PetscMUMPSInt *row, *col; 651 const PetscScalar *av, *bv, *v1, *v2; 652 PetscScalar *val; 653 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 654 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 655 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 656 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 657 #if defined(PETSC_USE_COMPLEX) 658 PetscBool hermitian, isset; 659 #endif 660 661 PetscFunctionBegin; 662 #if defined(PETSC_USE_COMPLEX) 663 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 664 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 665 #endif 666 PetscCall(MatGetBlockSize(A, &bs)); 667 rstart = A->rmap->rstart; 668 ai = aa->i; 669 aj = aa->j; 670 bi = bb->i; 671 bj = bb->j; 672 av = aa->a; 673 bv = bb->a; 674 675 garray = mat->garray; 676 677 if (reuse == MAT_INITIAL_MATRIX) { 678 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 679 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 680 PetscCall(PetscMalloc1(nz, &val)); 681 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 682 mumps->irn = row; 683 mumps->jcn = col; 684 mumps->val = mumps->val_alloc = val; 685 } else { 686 val = mumps->val; 687 } 688 689 jj = 0; 690 irow = rstart; 691 for (i = 0; i < mbs; i++) { 692 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 693 countA = ai[i + 1] - ai[i]; 694 countB = bi[i + 1] - bi[i]; 695 bjj = bj + bi[i]; 696 v1 = av + ai[i] * bs2; 697 v2 = bv + bi[i] * bs2; 698 699 if (bs > 1) { 700 /* A-part */ 701 for (j = 0; j < countA; j++) { 702 for (k = 0; k < bs; k++) { 703 for (m = 0; m < bs; m++) { 704 if (rstart + ajj[j] * bs > irow || k >= m) { 705 if (reuse == MAT_INITIAL_MATRIX) { 706 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 707 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 708 } 709 val[jj++] = v1[j * bs2 + m + k * bs]; 710 } 711 } 712 } 713 } 714 715 /* B-part */ 716 for (j = 0; j < countB; j++) { 717 for (k = 0; k < bs; k++) { 718 for (m = 0; m < bs; m++) { 719 if (reuse == MAT_INITIAL_MATRIX) { 720 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 721 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 722 } 723 val[jj++] = v2[j * bs2 + m + k * bs]; 724 } 725 } 726 } 727 } else { 728 /* A-part */ 729 for (j = 0; j < countA; j++) { 730 if (reuse == MAT_INITIAL_MATRIX) { 731 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 732 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 733 } 734 val[jj++] = v1[j]; 735 } 736 737 /* B-part */ 738 for (j = 0; j < countB; j++) { 739 if (reuse == MAT_INITIAL_MATRIX) { 740 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 741 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 742 } 743 val[jj++] = v2[j]; 744 } 745 } 746 irow += bs; 747 } 748 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 753 { 754 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 755 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 756 PetscMUMPSInt *row, *col; 757 const PetscScalar *av, *bv, *v1, *v2; 758 PetscScalar *val; 759 Mat Ad, Ao; 760 Mat_SeqAIJ *aa; 761 Mat_SeqAIJ *bb; 762 763 PetscFunctionBegin; 764 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 765 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 766 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 767 768 aa = (Mat_SeqAIJ *)(Ad)->data; 769 bb = (Mat_SeqAIJ *)(Ao)->data; 770 ai = aa->i; 771 aj = aa->j; 772 bi = bb->i; 773 bj = bb->j; 774 775 rstart = A->rmap->rstart; 776 cstart = A->cmap->rstart; 777 778 if (reuse == MAT_INITIAL_MATRIX) { 779 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 780 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 781 PetscCall(PetscMalloc1(nz, &val)); 782 mumps->nnz = nz; 783 mumps->irn = row; 784 mumps->jcn = col; 785 mumps->val = mumps->val_alloc = val; 786 } else { 787 val = mumps->val; 788 } 789 790 jj = 0; 791 irow = rstart; 792 for (i = 0; i < m; i++) { 793 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 794 countA = ai[i + 1] - ai[i]; 795 countB = bi[i + 1] - bi[i]; 796 bjj = bj + bi[i]; 797 v1 = av + ai[i]; 798 v2 = bv + bi[i]; 799 800 /* A-part */ 801 for (j = 0; j < countA; j++) { 802 if (reuse == MAT_INITIAL_MATRIX) { 803 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 804 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 805 } 806 val[jj++] = v1[j]; 807 } 808 809 /* B-part */ 810 for (j = 0; j < countB; j++) { 811 if (reuse == MAT_INITIAL_MATRIX) { 812 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 813 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 814 } 815 val[jj++] = v2[j]; 816 } 817 irow++; 818 } 819 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 820 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 821 PetscFunctionReturn(PETSC_SUCCESS); 822 } 823 824 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 825 { 826 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 827 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 828 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 829 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 830 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 831 const PetscInt bs2 = mat->bs2; 832 PetscInt bs; 833 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 834 PetscMUMPSInt *row, *col; 835 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 836 PetscScalar *val; 837 838 PetscFunctionBegin; 839 PetscCall(MatGetBlockSize(A, &bs)); 840 if (reuse == MAT_INITIAL_MATRIX) { 841 nz = bs2 * (aa->nz + bb->nz); 842 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 843 PetscCall(PetscMalloc1(nz, &val)); 844 mumps->nnz = nz; 845 mumps->irn = row; 846 mumps->jcn = col; 847 mumps->val = mumps->val_alloc = val; 848 } else { 849 val = mumps->val; 850 } 851 852 jj = 0; 853 irow = rstart; 854 for (i = 0; i < mbs; i++) { 855 countA = ai[i + 1] - ai[i]; 856 countB = bi[i + 1] - bi[i]; 857 ajj = aj + ai[i]; 858 bjj = bj + bi[i]; 859 v1 = av + bs2 * ai[i]; 860 v2 = bv + bs2 * bi[i]; 861 862 idx = 0; 863 /* A-part */ 864 for (k = 0; k < countA; k++) { 865 for (j = 0; j < bs; j++) { 866 for (n = 0; n < bs; n++) { 867 if (reuse == MAT_INITIAL_MATRIX) { 868 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 869 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 870 } 871 val[jj++] = v1[idx++]; 872 } 873 } 874 } 875 876 idx = 0; 877 /* B-part */ 878 for (k = 0; k < countB; k++) { 879 for (j = 0; j < bs; j++) { 880 for (n = 0; n < bs; n++) { 881 if (reuse == MAT_INITIAL_MATRIX) { 882 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 883 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 884 } 885 val[jj++] = v2[idx++]; 886 } 887 } 888 } 889 irow += bs; 890 } 891 PetscFunctionReturn(PETSC_SUCCESS); 892 } 893 894 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 895 { 896 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 897 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 898 PetscMUMPSInt *row, *col; 899 const PetscScalar *av, *bv, *v1, *v2; 900 PetscScalar *val; 901 Mat Ad, Ao; 902 Mat_SeqAIJ *aa; 903 Mat_SeqAIJ *bb; 904 #if defined(PETSC_USE_COMPLEX) 905 PetscBool hermitian, isset; 906 #endif 907 908 PetscFunctionBegin; 909 #if defined(PETSC_USE_COMPLEX) 910 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 911 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 912 #endif 913 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 914 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 915 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 916 917 aa = (Mat_SeqAIJ *)(Ad)->data; 918 bb = (Mat_SeqAIJ *)(Ao)->data; 919 ai = aa->i; 920 aj = aa->j; 921 adiag = aa->diag; 922 bi = bb->i; 923 bj = bb->j; 924 925 rstart = A->rmap->rstart; 926 927 if (reuse == MAT_INITIAL_MATRIX) { 928 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 929 nzb = 0; /* num of upper triangular entries in mat->B */ 930 for (i = 0; i < m; i++) { 931 nza += (ai[i + 1] - adiag[i]); 932 countB = bi[i + 1] - bi[i]; 933 bjj = bj + bi[i]; 934 for (j = 0; j < countB; j++) { 935 if (garray[bjj[j]] > rstart) nzb++; 936 } 937 } 938 939 nz = nza + nzb; /* total nz of upper triangular part of mat */ 940 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 941 PetscCall(PetscMalloc1(nz, &val)); 942 mumps->nnz = nz; 943 mumps->irn = row; 944 mumps->jcn = col; 945 mumps->val = mumps->val_alloc = val; 946 } else { 947 val = mumps->val; 948 } 949 950 jj = 0; 951 irow = rstart; 952 for (i = 0; i < m; i++) { 953 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 954 v1 = av + adiag[i]; 955 countA = ai[i + 1] - adiag[i]; 956 countB = bi[i + 1] - bi[i]; 957 bjj = bj + bi[i]; 958 v2 = bv + bi[i]; 959 960 /* A-part */ 961 for (j = 0; j < countA; j++) { 962 if (reuse == MAT_INITIAL_MATRIX) { 963 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 964 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 965 } 966 val[jj++] = v1[j]; 967 } 968 969 /* B-part */ 970 for (j = 0; j < countB; j++) { 971 if (garray[bjj[j]] > rstart) { 972 if (reuse == MAT_INITIAL_MATRIX) { 973 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 974 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 975 } 976 val[jj++] = v2[j]; 977 } 978 } 979 irow++; 980 } 981 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 982 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 983 PetscFunctionReturn(PETSC_SUCCESS); 984 } 985 986 PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 987 { 988 Mat **mats; 989 PetscInt nr, nc; 990 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 991 992 PetscFunctionBegin; 993 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 994 if (reuse == MAT_INITIAL_MATRIX) { 995 PetscMUMPSInt *irns, *jcns; 996 PetscScalar *vals; 997 PetscInt64 totnnz, cumnnz, maxnnz; 998 PetscInt *pjcns_w; 999 IS *rows, *cols; 1000 PetscInt **rows_idx, **cols_idx; 1001 1002 cumnnz = 0; 1003 maxnnz = 0; 1004 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1005 for (PetscInt r = 0; r < nr; r++) { 1006 for (PetscInt c = 0; c < nc; c++) { 1007 Mat sub = mats[r][c]; 1008 1009 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1010 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1011 if (sub) { 1012 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1013 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE; 1014 MatInfo info; 1015 1016 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1017 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1018 else { 1019 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1020 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1021 } 1022 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1023 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1024 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1025 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1026 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1027 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1028 1029 if (chol) { 1030 if (r == c) { 1031 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1032 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1033 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1034 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1035 } else { 1036 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1037 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1038 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1039 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1040 } 1041 } else { 1042 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1043 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1044 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1045 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1046 } 1047 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1048 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1049 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1050 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1051 maxnnz = PetscMax(maxnnz, info.nz_used); 1052 } 1053 } 1054 } 1055 1056 /* Allocate total COO */ 1057 totnnz = cumnnz; 1058 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1059 PetscCall(PetscMalloc1(totnnz, &vals)); 1060 1061 /* Handle rows and column maps 1062 We directly map rows and use an SF for the columns */ 1063 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1064 PetscCall(MatNestGetISs(A, rows, cols)); 1065 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1066 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1067 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1068 else (void)maxnnz; 1069 1070 cumnnz = 0; 1071 for (PetscInt r = 0; r < nr; r++) { 1072 for (PetscInt c = 0; c < nc; c++) { 1073 Mat sub = mats[r][c]; 1074 const PetscInt *ridx = rows_idx[r]; 1075 const PetscInt *cidx = cols_idx[c]; 1076 PetscInt rst; 1077 PetscSF csf; 1078 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1079 PetscLayout cmap; 1080 1081 mumps->nest_vals_start[r * nc + c] = cumnnz; 1082 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1083 1084 /* Extract inner blocks if needed */ 1085 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1086 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1087 else { 1088 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1089 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1090 } 1091 swap = (PetscBool)(isTrans || isHTrans); 1092 1093 /* Get column layout to map off-process columns */ 1094 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1095 1096 /* Get row start to map on-process rows */ 1097 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1098 1099 /* Directly use the mumps datastructure and use C ordering for now */ 1100 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1101 1102 /* Swap the role of rows and columns indices for transposed blocks 1103 since we need values with global final ordering */ 1104 if (swap) { 1105 cidx = rows_idx[r]; 1106 ridx = cols_idx[c]; 1107 } 1108 1109 /* Communicate column indices 1110 This could have been done with a single SF but it would have complicated the code a lot. 1111 But since we do it only once, we pay the price of setting up an SF for each block */ 1112 if (PetscDefined(USE_64BIT_INDICES)) { 1113 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1114 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1115 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1116 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1117 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1118 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1119 PetscCall(PetscSFDestroy(&csf)); 1120 1121 /* Import indices: use direct map for rows and mapped indices for columns */ 1122 if (swap) { 1123 for (PetscInt k = 0; k < mumps->nnz; k++) { 1124 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1125 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1126 } 1127 } else { 1128 for (PetscInt k = 0; k < mumps->nnz; k++) { 1129 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1130 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1131 } 1132 } 1133 1134 /* Import values to full COO */ 1135 if (isHTrans) { /* conjugate the entries */ 1136 for (PetscInt k = 0; k < mumps->nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1137 } 1138 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1139 1140 /* Shift new starting point and sanity check */ 1141 cumnnz += mumps->nnz; 1142 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1143 1144 /* Free scratch memory */ 1145 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1146 PetscCall(PetscFree(mumps->val_alloc)); 1147 mumps->val = NULL; 1148 mumps->nnz = 0; 1149 } 1150 } 1151 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1152 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1153 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1154 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1155 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1156 mumps->nest_vals_start[nr * nc] = cumnnz; 1157 1158 /* Set pointers for final MUMPS data structure */ 1159 mumps->nest_vals = vals; 1160 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1161 mumps->val = vals; 1162 mumps->irn = irns; 1163 mumps->jcn = jcns; 1164 mumps->nnz = cumnnz; 1165 } else { 1166 PetscScalar *oval = mumps->nest_vals; 1167 for (PetscInt r = 0; r < nr; r++) { 1168 for (PetscInt c = 0; c < nc; c++) { 1169 PetscBool isTrans, isHTrans = PETSC_FALSE; 1170 Mat sub = mats[r][c]; 1171 PetscInt midx = r * nc + c; 1172 1173 if (!mumps->nest_convert_to_triples[midx]) continue; 1174 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1175 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1176 else { 1177 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1178 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1179 } 1180 mumps->val = oval + mumps->nest_vals_start[midx]; 1181 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1182 if (isHTrans) { 1183 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1184 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1185 } 1186 } 1187 } 1188 mumps->val = oval; 1189 } 1190 PetscFunctionReturn(PETSC_SUCCESS); 1191 } 1192 1193 PetscErrorCode MatDestroy_MUMPS(Mat A) 1194 { 1195 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1196 1197 PetscFunctionBegin; 1198 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1199 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1200 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1201 PetscCall(VecDestroy(&mumps->b_seq)); 1202 PetscCall(VecDestroy(&mumps->x_seq)); 1203 PetscCall(PetscFree(mumps->id.perm_in)); 1204 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1205 PetscCall(PetscFree(mumps->val_alloc)); 1206 PetscCall(PetscFree(mumps->info)); 1207 PetscCall(PetscFree(mumps->ICNTL_pre)); 1208 PetscCall(PetscFree(mumps->CNTL_pre)); 1209 PetscCall(MatMumpsResetSchur_Private(mumps)); 1210 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1211 mumps->id.job = JOB_END; 1212 PetscMUMPS_c(mumps); 1213 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 1214 if (mumps->mumps_comm != MPI_COMM_NULL) { 1215 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1216 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1217 } 1218 } 1219 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1220 if (mumps->use_petsc_omp_support) { 1221 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1222 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1223 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1224 } 1225 #endif 1226 PetscCall(PetscFree(mumps->ia_alloc)); 1227 PetscCall(PetscFree(mumps->ja_alloc)); 1228 PetscCall(PetscFree(mumps->recvcount)); 1229 PetscCall(PetscFree(mumps->reqs)); 1230 PetscCall(PetscFree(mumps->irhs_loc)); 1231 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1232 PetscCall(PetscFree(mumps->nest_vals)); 1233 PetscCall(PetscFree(A->data)); 1234 1235 /* clear composed functions */ 1236 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1237 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1238 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1239 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1240 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1241 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1242 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1243 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1244 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1245 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1246 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1247 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1248 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1249 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1250 PetscFunctionReturn(PETSC_SUCCESS); 1251 } 1252 1253 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1254 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1255 { 1256 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1257 const PetscMPIInt ompsize = mumps->omp_comm_size; 1258 PetscInt i, m, M, rstart; 1259 1260 PetscFunctionBegin; 1261 PetscCall(MatGetSize(A, &M, NULL)); 1262 PetscCall(MatGetLocalSize(A, &m, NULL)); 1263 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1264 if (ompsize == 1) { 1265 if (!mumps->irhs_loc) { 1266 mumps->nloc_rhs = m; 1267 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1268 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1269 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1270 } 1271 mumps->id.rhs_loc = (MumpsScalar *)array; 1272 } else { 1273 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1274 const PetscInt *ranges; 1275 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1276 MPI_Group petsc_group, omp_group; 1277 PetscScalar *recvbuf = NULL; 1278 1279 if (mumps->is_omp_master) { 1280 /* Lazily initialize the omp stuff for distributed rhs */ 1281 if (!mumps->irhs_loc) { 1282 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1283 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1284 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1285 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1286 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1287 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1288 1289 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1290 mumps->nloc_rhs = 0; 1291 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1292 for (j = 0; j < ompsize; j++) { 1293 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1294 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1295 } 1296 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1297 for (j = k = 0; j < ompsize; j++) { 1298 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1299 } 1300 1301 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1302 PetscCallMPI(MPI_Group_free(&petsc_group)); 1303 PetscCallMPI(MPI_Group_free(&omp_group)); 1304 } 1305 1306 /* Realloc buffers when current nrhs is bigger than what we have met */ 1307 if (nrhs > mumps->max_nrhs) { 1308 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1309 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1310 mumps->max_nrhs = nrhs; 1311 } 1312 1313 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1314 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1315 mumps->rhs_disps[0] = 0; 1316 for (j = 1; j < ompsize; j++) { 1317 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1318 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1319 } 1320 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1321 } 1322 1323 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1324 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1325 1326 if (mumps->is_omp_master) { 1327 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1328 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1329 for (j = 0; j < ompsize; j++) { 1330 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1331 dst = dstbase; 1332 for (i = 0; i < nrhs; i++) { 1333 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1334 src += mumps->rhs_nrow[j]; 1335 dst += mumps->nloc_rhs; 1336 } 1337 dstbase += mumps->rhs_nrow[j]; 1338 } 1339 } 1340 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1341 } 1342 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1343 } 1344 mumps->id.nrhs = nrhs; 1345 mumps->id.nloc_rhs = mumps->nloc_rhs; 1346 mumps->id.lrhs_loc = mumps->nloc_rhs; 1347 mumps->id.irhs_loc = mumps->irhs_loc; 1348 PetscFunctionReturn(PETSC_SUCCESS); 1349 } 1350 1351 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1352 { 1353 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1354 const PetscScalar *rarray = NULL; 1355 PetscScalar *array; 1356 IS is_iden, is_petsc; 1357 PetscInt i; 1358 PetscBool second_solve = PETSC_FALSE; 1359 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1360 1361 PetscFunctionBegin; 1362 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1363 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1364 &cite1)); 1365 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1366 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1367 &cite2)); 1368 1369 if (A->factorerrortype) { 1370 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1371 PetscCall(VecSetInf(x)); 1372 PetscFunctionReturn(PETSC_SUCCESS); 1373 } 1374 1375 mumps->id.nrhs = 1; 1376 if (mumps->petsc_size > 1) { 1377 if (mumps->ICNTL20 == 10) { 1378 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1379 PetscCall(VecGetArrayRead(b, &rarray)); 1380 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1381 } else { 1382 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1383 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1384 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1385 if (!mumps->myid) { 1386 PetscCall(VecGetArray(mumps->b_seq, &array)); 1387 mumps->id.rhs = (MumpsScalar *)array; 1388 } 1389 } 1390 } else { /* petsc_size == 1 */ 1391 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1392 PetscCall(VecCopy(b, x)); 1393 PetscCall(VecGetArray(x, &array)); 1394 mumps->id.rhs = (MumpsScalar *)array; 1395 } 1396 1397 /* 1398 handle condensation step of Schur complement (if any) 1399 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1400 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1401 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1402 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1403 */ 1404 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1405 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1406 second_solve = PETSC_TRUE; 1407 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1408 } 1409 /* solve phase */ 1410 mumps->id.job = JOB_SOLVE; 1411 PetscMUMPS_c(mumps); 1412 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1413 1414 /* handle expansion step of Schur complement (if any) */ 1415 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1416 1417 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1418 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1419 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1420 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1421 } 1422 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1423 PetscInt *isol2_loc = NULL; 1424 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1425 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1426 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1427 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1428 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1429 PetscCall(ISDestroy(&is_iden)); 1430 PetscCall(ISDestroy(&is_petsc)); 1431 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1432 } 1433 1434 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1435 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1436 } 1437 1438 if (mumps->petsc_size > 1) { 1439 if (mumps->ICNTL20 == 10) { 1440 PetscCall(VecRestoreArrayRead(b, &rarray)); 1441 } else if (!mumps->myid) { 1442 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1443 } 1444 } else PetscCall(VecRestoreArray(x, &array)); 1445 1446 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1447 PetscFunctionReturn(PETSC_SUCCESS); 1448 } 1449 1450 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1451 { 1452 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1453 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1454 1455 PetscFunctionBegin; 1456 mumps->id.ICNTL(9) = 0; 1457 PetscCall(MatSolve_MUMPS(A, b, x)); 1458 mumps->id.ICNTL(9) = value; 1459 PetscFunctionReturn(PETSC_SUCCESS); 1460 } 1461 1462 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1463 { 1464 Mat Bt = NULL; 1465 PetscBool denseX, denseB, flg, flgT; 1466 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1467 PetscInt i, nrhs, M; 1468 PetscScalar *array; 1469 const PetscScalar *rbray; 1470 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1471 PetscMUMPSInt *isol_loc, *isol_loc_save; 1472 PetscScalar *bray, *sol_loc, *sol_loc_save; 1473 IS is_to, is_from; 1474 PetscInt k, proc, j, m, myrstart; 1475 const PetscInt *rstart; 1476 Vec v_mpi, msol_loc; 1477 VecScatter scat_sol; 1478 Vec b_seq; 1479 VecScatter scat_rhs; 1480 PetscScalar *aa; 1481 PetscInt spnr, *ia, *ja; 1482 Mat_MPIAIJ *b = NULL; 1483 1484 PetscFunctionBegin; 1485 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1486 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1487 1488 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1489 if (denseB) { 1490 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1491 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1492 } else { /* sparse B */ 1493 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1494 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1495 if (flgT) { /* input B is transpose of actual RHS matrix, 1496 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1497 PetscCall(MatTransposeGetMat(B, &Bt)); 1498 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1499 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1500 } 1501 1502 PetscCall(MatGetSize(B, &M, &nrhs)); 1503 mumps->id.nrhs = nrhs; 1504 mumps->id.lrhs = M; 1505 mumps->id.rhs = NULL; 1506 1507 if (mumps->petsc_size == 1) { 1508 PetscScalar *aa; 1509 PetscInt spnr, *ia, *ja; 1510 PetscBool second_solve = PETSC_FALSE; 1511 1512 PetscCall(MatDenseGetArray(X, &array)); 1513 mumps->id.rhs = (MumpsScalar *)array; 1514 1515 if (denseB) { 1516 /* copy B to X */ 1517 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1518 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1519 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1520 } else { /* sparse B */ 1521 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1522 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1523 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1524 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1525 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1526 } 1527 /* handle condensation step of Schur complement (if any) */ 1528 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1529 second_solve = PETSC_TRUE; 1530 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1531 } 1532 /* solve phase */ 1533 mumps->id.job = JOB_SOLVE; 1534 PetscMUMPS_c(mumps); 1535 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1536 1537 /* handle expansion step of Schur complement (if any) */ 1538 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1539 if (!denseB) { /* sparse B */ 1540 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1541 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1542 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1543 } 1544 PetscCall(MatDenseRestoreArray(X, &array)); 1545 PetscFunctionReturn(PETSC_SUCCESS); 1546 } 1547 1548 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1549 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1550 1551 /* create msol_loc to hold mumps local solution */ 1552 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1553 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1554 1555 lsol_loc = mumps->id.lsol_loc; 1556 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1557 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1558 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1559 mumps->id.isol_loc = isol_loc; 1560 1561 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1562 1563 if (denseB) { 1564 if (mumps->ICNTL20 == 10) { 1565 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1566 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1567 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1568 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1569 PetscCall(MatGetLocalSize(B, &m, NULL)); 1570 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1571 } else { 1572 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1573 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1574 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1575 0, re-arrange B into desired order, which is a local operation. 1576 */ 1577 1578 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1579 /* wrap dense rhs matrix B into a vector v_mpi */ 1580 PetscCall(MatGetLocalSize(B, &m, NULL)); 1581 PetscCall(MatDenseGetArray(B, &bray)); 1582 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1583 PetscCall(MatDenseRestoreArray(B, &bray)); 1584 1585 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1586 if (!mumps->myid) { 1587 PetscInt *idx; 1588 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1589 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1590 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1591 k = 0; 1592 for (proc = 0; proc < mumps->petsc_size; proc++) { 1593 for (j = 0; j < nrhs; j++) { 1594 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1595 } 1596 } 1597 1598 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1599 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1600 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1601 } else { 1602 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1603 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1604 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1605 } 1606 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1607 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1608 PetscCall(ISDestroy(&is_to)); 1609 PetscCall(ISDestroy(&is_from)); 1610 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1611 1612 if (!mumps->myid) { /* define rhs on the host */ 1613 PetscCall(VecGetArray(b_seq, &bray)); 1614 mumps->id.rhs = (MumpsScalar *)bray; 1615 PetscCall(VecRestoreArray(b_seq, &bray)); 1616 } 1617 } 1618 } else { /* sparse B */ 1619 b = (Mat_MPIAIJ *)Bt->data; 1620 1621 /* wrap dense X into a vector v_mpi */ 1622 PetscCall(MatGetLocalSize(X, &m, NULL)); 1623 PetscCall(MatDenseGetArray(X, &bray)); 1624 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1625 PetscCall(MatDenseRestoreArray(X, &bray)); 1626 1627 if (!mumps->myid) { 1628 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1629 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1630 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1631 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1632 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1633 } else { 1634 mumps->id.irhs_ptr = NULL; 1635 mumps->id.irhs_sparse = NULL; 1636 mumps->id.nz_rhs = 0; 1637 mumps->id.rhs_sparse = NULL; 1638 } 1639 } 1640 1641 /* solve phase */ 1642 mumps->id.job = JOB_SOLVE; 1643 PetscMUMPS_c(mumps); 1644 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1)); 1645 1646 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1647 PetscCall(MatDenseGetArray(X, &array)); 1648 PetscCall(VecPlaceArray(v_mpi, array)); 1649 1650 /* create scatter scat_sol */ 1651 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1652 /* iidx: index for scatter mumps solution to petsc X */ 1653 1654 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1655 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1656 for (i = 0; i < lsol_loc; i++) { 1657 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1658 1659 for (proc = 0; proc < mumps->petsc_size; proc++) { 1660 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1661 myrstart = rstart[proc]; 1662 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1663 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1664 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1665 break; 1666 } 1667 } 1668 1669 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1670 } 1671 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1672 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1673 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1674 PetscCall(ISDestroy(&is_from)); 1675 PetscCall(ISDestroy(&is_to)); 1676 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1677 PetscCall(MatDenseRestoreArray(X, &array)); 1678 1679 /* free spaces */ 1680 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1681 mumps->id.isol_loc = isol_loc_save; 1682 1683 PetscCall(PetscFree2(sol_loc, isol_loc)); 1684 PetscCall(PetscFree(idxx)); 1685 PetscCall(VecDestroy(&msol_loc)); 1686 PetscCall(VecDestroy(&v_mpi)); 1687 if (!denseB) { 1688 if (!mumps->myid) { 1689 b = (Mat_MPIAIJ *)Bt->data; 1690 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1691 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1692 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1693 } 1694 } else { 1695 if (mumps->ICNTL20 == 0) { 1696 PetscCall(VecDestroy(&b_seq)); 1697 PetscCall(VecScatterDestroy(&scat_rhs)); 1698 } 1699 } 1700 PetscCall(VecScatterDestroy(&scat_sol)); 1701 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1702 PetscFunctionReturn(PETSC_SUCCESS); 1703 } 1704 1705 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1706 { 1707 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1708 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1709 1710 PetscFunctionBegin; 1711 mumps->id.ICNTL(9) = 0; 1712 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1713 mumps->id.ICNTL(9) = value; 1714 PetscFunctionReturn(PETSC_SUCCESS); 1715 } 1716 1717 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1718 { 1719 PetscBool flg; 1720 Mat B; 1721 1722 PetscFunctionBegin; 1723 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1724 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1725 1726 /* Create B=Bt^T that uses Bt's data structure */ 1727 PetscCall(MatCreateTranspose(Bt, &B)); 1728 1729 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1730 PetscCall(MatDestroy(&B)); 1731 PetscFunctionReturn(PETSC_SUCCESS); 1732 } 1733 1734 #if !defined(PETSC_USE_COMPLEX) 1735 /* 1736 input: 1737 F: numeric factor 1738 output: 1739 nneg: total number of negative pivots 1740 nzero: total number of zero pivots 1741 npos: (global dimension of F) - nneg - nzero 1742 */ 1743 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1744 { 1745 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1746 PetscMPIInt size; 1747 1748 PetscFunctionBegin; 1749 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1750 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1751 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1752 1753 if (nneg) *nneg = mumps->id.INFOG(12); 1754 if (nzero || npos) { 1755 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1756 if (nzero) *nzero = mumps->id.INFOG(28); 1757 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1758 } 1759 PetscFunctionReturn(PETSC_SUCCESS); 1760 } 1761 #endif 1762 1763 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1764 { 1765 PetscInt i, nreqs; 1766 PetscMUMPSInt *irn, *jcn; 1767 PetscMPIInt count; 1768 PetscInt64 totnnz, remain; 1769 const PetscInt osize = mumps->omp_comm_size; 1770 PetscScalar *val; 1771 1772 PetscFunctionBegin; 1773 if (osize > 1) { 1774 if (reuse == MAT_INITIAL_MATRIX) { 1775 /* master first gathers counts of nonzeros to receive */ 1776 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1777 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1778 1779 /* Then each computes number of send/recvs */ 1780 if (mumps->is_omp_master) { 1781 /* Start from 1 since self communication is not done in MPI */ 1782 nreqs = 0; 1783 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1784 } else { 1785 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1786 } 1787 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1788 1789 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1790 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1791 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1792 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1793 */ 1794 nreqs = 0; /* counter for actual send/recvs */ 1795 if (mumps->is_omp_master) { 1796 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1797 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1798 PetscCall(PetscMalloc1(totnnz, &val)); 1799 1800 /* Self communication */ 1801 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1802 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1803 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1804 1805 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1806 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1807 PetscCall(PetscFree(mumps->val_alloc)); 1808 mumps->nnz = totnnz; 1809 mumps->irn = irn; 1810 mumps->jcn = jcn; 1811 mumps->val = mumps->val_alloc = val; 1812 1813 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1814 jcn += mumps->recvcount[0]; 1815 val += mumps->recvcount[0]; 1816 1817 /* Remote communication */ 1818 for (i = 1; i < osize; i++) { 1819 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1820 remain = mumps->recvcount[i] - count; 1821 while (count > 0) { 1822 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1823 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1824 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1825 irn += count; 1826 jcn += count; 1827 val += count; 1828 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1829 remain -= count; 1830 } 1831 } 1832 } else { 1833 irn = mumps->irn; 1834 jcn = mumps->jcn; 1835 val = mumps->val; 1836 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1837 remain = mumps->nnz - count; 1838 while (count > 0) { 1839 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1840 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1841 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1842 irn += count; 1843 jcn += count; 1844 val += count; 1845 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1846 remain -= count; 1847 } 1848 } 1849 } else { 1850 nreqs = 0; 1851 if (mumps->is_omp_master) { 1852 val = mumps->val + mumps->recvcount[0]; 1853 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1854 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1855 remain = mumps->recvcount[i] - count; 1856 while (count > 0) { 1857 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1858 val += count; 1859 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1860 remain -= count; 1861 } 1862 } 1863 } else { 1864 val = mumps->val; 1865 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1866 remain = mumps->nnz - count; 1867 while (count > 0) { 1868 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1869 val += count; 1870 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1871 remain -= count; 1872 } 1873 } 1874 } 1875 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1876 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1877 } 1878 PetscFunctionReturn(PETSC_SUCCESS); 1879 } 1880 1881 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1882 { 1883 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1884 PetscBool isMPIAIJ; 1885 1886 PetscFunctionBegin; 1887 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1888 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1889 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1890 PetscFunctionReturn(PETSC_SUCCESS); 1891 } 1892 1893 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1894 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1895 1896 /* numerical factorization phase */ 1897 mumps->id.job = JOB_FACTNUMERIC; 1898 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1899 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1900 } else { 1901 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1902 } 1903 PetscMUMPS_c(mumps); 1904 if (mumps->id.INFOG(1) < 0) { 1905 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 1906 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1907 PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1908 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1909 } else if (mumps->id.INFOG(1) == -13) { 1910 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1911 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1912 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1913 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1914 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1915 } else { 1916 PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1917 F->factorerrortype = MAT_FACTOR_OTHER; 1918 } 1919 } 1920 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16)); 1921 1922 F->assembled = PETSC_TRUE; 1923 1924 if (F->schur) { /* reset Schur status to unfactored */ 1925 #if defined(PETSC_HAVE_CUDA) 1926 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1927 #endif 1928 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1929 mumps->id.ICNTL(19) = 2; 1930 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1931 } 1932 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1933 } 1934 1935 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1936 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1937 1938 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1939 if (mumps->petsc_size > 1) { 1940 PetscInt lsol_loc; 1941 PetscScalar *sol_loc; 1942 1943 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 1944 1945 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1946 if (mumps->x_seq) { 1947 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1948 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1949 PetscCall(VecDestroy(&mumps->x_seq)); 1950 } 1951 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1952 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 1953 mumps->id.lsol_loc = lsol_loc; 1954 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1955 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 1956 } 1957 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1958 PetscFunctionReturn(PETSC_SUCCESS); 1959 } 1960 1961 /* Sets MUMPS options from the options database */ 1962 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1963 { 1964 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1965 PetscMUMPSInt icntl = 0, size, *listvar_schur; 1966 PetscInt info[80], i, ninfo = 80, rbs, cbs; 1967 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 1968 MumpsScalar *arr; 1969 1970 PetscFunctionBegin; 1971 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 1972 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1973 PetscInt nthreads = 0; 1974 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1975 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1976 1977 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1978 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 1979 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 1980 1981 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 1982 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1983 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1984 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 1985 if (mumps->use_petsc_omp_support) { 1986 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 1987 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1988 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 1989 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1990 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 1991 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 1992 #endif 1993 } else { 1994 mumps->omp_comm = PETSC_COMM_SELF; 1995 mumps->mumps_comm = mumps->petsc_comm; 1996 mumps->is_omp_master = PETSC_TRUE; 1997 } 1998 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 1999 mumps->reqs = NULL; 2000 mumps->tag = 0; 2001 2002 if (mumps->mumps_comm != MPI_COMM_NULL) { 2003 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2004 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2005 MPI_Comm comm; 2006 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2007 mumps->mumps_comm = comm; 2008 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2009 } 2010 2011 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2012 mumps->id.job = JOB_INIT; 2013 mumps->id.par = 1; /* host participates factorizaton and solve */ 2014 mumps->id.sym = mumps->sym; 2015 2016 size = mumps->id.size_schur; 2017 arr = mumps->id.schur; 2018 listvar_schur = mumps->id.listvar_schur; 2019 PetscMUMPS_c(mumps); 2020 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1)); 2021 /* restore cached ICNTL and CNTL values */ 2022 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2023 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2024 PetscCall(PetscFree(mumps->ICNTL_pre)); 2025 PetscCall(PetscFree(mumps->CNTL_pre)); 2026 2027 if (schur) { 2028 mumps->id.size_schur = size; 2029 mumps->id.schur_lld = size; 2030 mumps->id.schur = arr; 2031 mumps->id.listvar_schur = listvar_schur; 2032 if (mumps->petsc_size > 1) { 2033 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2034 2035 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2036 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2037 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2038 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2039 } else { 2040 if (F->factortype == MAT_FACTOR_LU) { 2041 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2042 } else { 2043 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2044 } 2045 } 2046 mumps->id.ICNTL(26) = -1; 2047 } 2048 2049 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2050 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2051 */ 2052 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2053 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2054 2055 mumps->scat_rhs = NULL; 2056 mumps->scat_sol = NULL; 2057 2058 /* set PETSc-MUMPS default options - override MUMPS default */ 2059 mumps->id.ICNTL(3) = 0; 2060 mumps->id.ICNTL(4) = 0; 2061 if (mumps->petsc_size == 1) { 2062 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2063 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2064 } else { 2065 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2066 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2067 } 2068 } 2069 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2070 if (flg) mumps->id.ICNTL(1) = icntl; 2071 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2072 if (flg) mumps->id.ICNTL(2) = icntl; 2073 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2074 if (flg) mumps->id.ICNTL(3) = icntl; 2075 2076 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2077 if (flg) mumps->id.ICNTL(4) = icntl; 2078 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2079 2080 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2081 if (flg) mumps->id.ICNTL(6) = icntl; 2082 2083 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2084 if (flg) { 2085 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2086 mumps->id.ICNTL(7) = icntl; 2087 } 2088 2089 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2090 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2091 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2092 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2093 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2094 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2095 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2096 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2097 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2098 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2099 if (flg) { 2100 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2101 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2102 } 2103 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2104 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2105 PetscCall(MatDestroy(&F->schur)); 2106 PetscCall(MatMumpsResetSchur_Private(mumps)); 2107 } 2108 2109 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2110 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2111 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2112 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2113 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2114 In short, we could not use distributed RHS with MPICH until v4.0b1. 2115 */ 2116 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2117 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2118 #else 2119 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2120 #endif 2121 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2122 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2123 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2124 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2125 #endif 2126 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2127 2128 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2129 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2130 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2131 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2132 2133 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2134 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2135 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2136 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2137 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2138 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2139 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2140 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2141 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2142 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2143 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2144 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2145 2146 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2147 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2148 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2149 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2150 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2151 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2152 2153 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2154 2155 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2156 if (ninfo) { 2157 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2158 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2159 mumps->ninfo = ninfo; 2160 for (i = 0; i < ninfo; i++) { 2161 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2162 mumps->info[i] = info[i]; 2163 } 2164 } 2165 PetscOptionsEnd(); 2166 PetscFunctionReturn(PETSC_SUCCESS); 2167 } 2168 2169 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2170 { 2171 PetscFunctionBegin; 2172 if (mumps->id.INFOG(1) < 0) { 2173 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1)); 2174 if (mumps->id.INFOG(1) == -6) { 2175 PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2176 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2177 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2178 PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2179 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2180 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2181 PetscCall(PetscInfo(F, "Empty matrix\n")); 2182 } else { 2183 PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2184 F->factorerrortype = MAT_FACTOR_OTHER; 2185 } 2186 } 2187 PetscFunctionReturn(PETSC_SUCCESS); 2188 } 2189 2190 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2191 { 2192 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2193 Vec b; 2194 const PetscInt M = A->rmap->N; 2195 2196 PetscFunctionBegin; 2197 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2198 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2199 PetscFunctionReturn(PETSC_SUCCESS); 2200 } 2201 2202 /* Set MUMPS options from the options database */ 2203 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2204 2205 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2206 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2207 2208 /* analysis phase */ 2209 mumps->id.job = JOB_FACTSYMBOLIC; 2210 mumps->id.n = M; 2211 switch (mumps->id.ICNTL(18)) { 2212 case 0: /* centralized assembled matrix input */ 2213 if (!mumps->myid) { 2214 mumps->id.nnz = mumps->nnz; 2215 mumps->id.irn = mumps->irn; 2216 mumps->id.jcn = mumps->jcn; 2217 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2218 if (r) { 2219 mumps->id.ICNTL(7) = 1; 2220 if (!mumps->myid) { 2221 const PetscInt *idx; 2222 PetscInt i; 2223 2224 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2225 PetscCall(ISGetIndices(r, &idx)); 2226 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2227 PetscCall(ISRestoreIndices(r, &idx)); 2228 } 2229 } 2230 } 2231 break; 2232 case 3: /* distributed assembled matrix input (size>1) */ 2233 mumps->id.nnz_loc = mumps->nnz; 2234 mumps->id.irn_loc = mumps->irn; 2235 mumps->id.jcn_loc = mumps->jcn; 2236 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2237 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2238 PetscCall(MatCreateVecs(A, NULL, &b)); 2239 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2240 PetscCall(VecDestroy(&b)); 2241 } 2242 break; 2243 } 2244 PetscMUMPS_c(mumps); 2245 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2246 2247 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2248 F->ops->solve = MatSolve_MUMPS; 2249 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2250 F->ops->matsolve = MatMatSolve_MUMPS; 2251 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2252 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2253 2254 mumps->matstruc = SAME_NONZERO_PATTERN; 2255 PetscFunctionReturn(PETSC_SUCCESS); 2256 } 2257 2258 /* Note the Petsc r and c permutations are ignored */ 2259 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2260 { 2261 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2262 Vec b; 2263 const PetscInt M = A->rmap->N; 2264 2265 PetscFunctionBegin; 2266 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2267 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2268 PetscFunctionReturn(PETSC_SUCCESS); 2269 } 2270 2271 /* Set MUMPS options from the options database */ 2272 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2273 2274 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2275 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2276 2277 /* analysis phase */ 2278 mumps->id.job = JOB_FACTSYMBOLIC; 2279 mumps->id.n = M; 2280 switch (mumps->id.ICNTL(18)) { 2281 case 0: /* centralized assembled matrix input */ 2282 if (!mumps->myid) { 2283 mumps->id.nnz = mumps->nnz; 2284 mumps->id.irn = mumps->irn; 2285 mumps->id.jcn = mumps->jcn; 2286 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2287 } 2288 break; 2289 case 3: /* distributed assembled matrix input (size>1) */ 2290 mumps->id.nnz_loc = mumps->nnz; 2291 mumps->id.irn_loc = mumps->irn; 2292 mumps->id.jcn_loc = mumps->jcn; 2293 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2294 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2295 PetscCall(MatCreateVecs(A, NULL, &b)); 2296 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2297 PetscCall(VecDestroy(&b)); 2298 } 2299 break; 2300 } 2301 PetscMUMPS_c(mumps); 2302 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2303 2304 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2305 F->ops->solve = MatSolve_MUMPS; 2306 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2307 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2308 2309 mumps->matstruc = SAME_NONZERO_PATTERN; 2310 PetscFunctionReturn(PETSC_SUCCESS); 2311 } 2312 2313 /* Note the Petsc r permutation and factor info are ignored */ 2314 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2315 { 2316 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2317 Vec b; 2318 const PetscInt M = A->rmap->N; 2319 2320 PetscFunctionBegin; 2321 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2322 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2323 PetscFunctionReturn(PETSC_SUCCESS); 2324 } 2325 2326 /* Set MUMPS options from the options database */ 2327 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2328 2329 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2330 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2331 2332 /* analysis phase */ 2333 mumps->id.job = JOB_FACTSYMBOLIC; 2334 mumps->id.n = M; 2335 switch (mumps->id.ICNTL(18)) { 2336 case 0: /* centralized assembled matrix input */ 2337 if (!mumps->myid) { 2338 mumps->id.nnz = mumps->nnz; 2339 mumps->id.irn = mumps->irn; 2340 mumps->id.jcn = mumps->jcn; 2341 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2342 } 2343 break; 2344 case 3: /* distributed assembled matrix input (size>1) */ 2345 mumps->id.nnz_loc = mumps->nnz; 2346 mumps->id.irn_loc = mumps->irn; 2347 mumps->id.jcn_loc = mumps->jcn; 2348 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2349 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2350 PetscCall(MatCreateVecs(A, NULL, &b)); 2351 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2352 PetscCall(VecDestroy(&b)); 2353 } 2354 break; 2355 } 2356 PetscMUMPS_c(mumps); 2357 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2358 2359 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2360 F->ops->solve = MatSolve_MUMPS; 2361 F->ops->solvetranspose = MatSolve_MUMPS; 2362 F->ops->matsolve = MatMatSolve_MUMPS; 2363 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2364 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2365 #if defined(PETSC_USE_COMPLEX) 2366 F->ops->getinertia = NULL; 2367 #else 2368 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2369 #endif 2370 2371 mumps->matstruc = SAME_NONZERO_PATTERN; 2372 PetscFunctionReturn(PETSC_SUCCESS); 2373 } 2374 2375 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2376 { 2377 PetscBool iascii; 2378 PetscViewerFormat format; 2379 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2380 2381 PetscFunctionBegin; 2382 /* check if matrix is mumps type */ 2383 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2384 2385 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2386 if (iascii) { 2387 PetscCall(PetscViewerGetFormat(viewer, &format)); 2388 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2389 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2390 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2391 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2392 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2393 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2394 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2395 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2396 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2397 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2398 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2399 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2400 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2401 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2402 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2403 if (mumps->id.ICNTL(11) > 0) { 2404 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2405 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2406 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2407 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2408 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2409 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2410 } 2411 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2412 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2413 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2414 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2415 /* ICNTL(15-17) not used */ 2416 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2417 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2418 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2419 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2420 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2421 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2422 2423 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2424 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2425 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2426 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2427 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2428 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2429 2430 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2431 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2432 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2433 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2434 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2435 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2436 2437 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2438 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2439 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2440 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2441 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2442 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2443 2444 /* information local to each processor */ 2445 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2446 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2447 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2448 PetscCall(PetscViewerFlush(viewer)); 2449 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2450 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2451 PetscCall(PetscViewerFlush(viewer)); 2452 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2453 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2454 PetscCall(PetscViewerFlush(viewer)); 2455 2456 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2457 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2458 PetscCall(PetscViewerFlush(viewer)); 2459 2460 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2461 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2462 PetscCall(PetscViewerFlush(viewer)); 2463 2464 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2465 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2466 PetscCall(PetscViewerFlush(viewer)); 2467 2468 if (mumps->ninfo && mumps->ninfo <= 80) { 2469 PetscInt i; 2470 for (i = 0; i < mumps->ninfo; i++) { 2471 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2472 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2473 PetscCall(PetscViewerFlush(viewer)); 2474 } 2475 } 2476 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2477 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2478 2479 if (mumps->myid == 0) { /* information from the host */ 2480 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2481 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2482 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2483 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2484 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2486 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2487 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2488 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2489 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2490 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2491 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2494 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2495 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2498 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2499 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2503 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2510 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2511 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2513 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2514 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2515 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2517 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2518 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2519 } 2520 } 2521 } 2522 PetscFunctionReturn(PETSC_SUCCESS); 2523 } 2524 2525 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2526 { 2527 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2528 2529 PetscFunctionBegin; 2530 info->block_size = 1.0; 2531 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2532 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2533 info->nz_unneeded = 0.0; 2534 info->assemblies = 0.0; 2535 info->mallocs = 0.0; 2536 info->memory = 0.0; 2537 info->fill_ratio_given = 0; 2538 info->fill_ratio_needed = 0; 2539 info->factor_mallocs = 0; 2540 PetscFunctionReturn(PETSC_SUCCESS); 2541 } 2542 2543 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2544 { 2545 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2546 const PetscScalar *arr; 2547 const PetscInt *idxs; 2548 PetscInt size, i; 2549 2550 PetscFunctionBegin; 2551 PetscCall(ISGetLocalSize(is, &size)); 2552 /* Schur complement matrix */ 2553 PetscCall(MatDestroy(&F->schur)); 2554 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2555 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2556 mumps->id.schur = (MumpsScalar *)arr; 2557 mumps->id.size_schur = size; 2558 mumps->id.schur_lld = size; 2559 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2560 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2561 2562 /* MUMPS expects Fortran style indices */ 2563 PetscCall(PetscFree(mumps->id.listvar_schur)); 2564 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2565 PetscCall(ISGetIndices(is, &idxs)); 2566 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2567 PetscCall(ISRestoreIndices(is, &idxs)); 2568 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2569 mumps->id.ICNTL(26) = -1; 2570 PetscFunctionReturn(PETSC_SUCCESS); 2571 } 2572 2573 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2574 { 2575 Mat St; 2576 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2577 PetscScalar *array; 2578 #if defined(PETSC_USE_COMPLEX) 2579 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2580 #endif 2581 2582 PetscFunctionBegin; 2583 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2584 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2585 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2586 PetscCall(MatSetType(St, MATDENSE)); 2587 PetscCall(MatSetUp(St)); 2588 PetscCall(MatDenseGetArray(St, &array)); 2589 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2590 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2591 PetscInt i, j, N = mumps->id.size_schur; 2592 for (i = 0; i < N; i++) { 2593 for (j = 0; j < N; j++) { 2594 #if !defined(PETSC_USE_COMPLEX) 2595 PetscScalar val = mumps->id.schur[i * N + j]; 2596 #else 2597 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2598 #endif 2599 array[j * N + i] = val; 2600 } 2601 } 2602 } else { /* stored by columns */ 2603 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2604 } 2605 } else { /* either full or lower-triangular (not packed) */ 2606 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2607 PetscInt i, j, N = mumps->id.size_schur; 2608 for (i = 0; i < N; i++) { 2609 for (j = i; j < N; j++) { 2610 #if !defined(PETSC_USE_COMPLEX) 2611 PetscScalar val = mumps->id.schur[i * N + j]; 2612 #else 2613 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2614 #endif 2615 array[i * N + j] = val; 2616 array[j * N + i] = val; 2617 } 2618 } 2619 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2620 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2621 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2622 PetscInt i, j, N = mumps->id.size_schur; 2623 for (i = 0; i < N; i++) { 2624 for (j = 0; j < i + 1; j++) { 2625 #if !defined(PETSC_USE_COMPLEX) 2626 PetscScalar val = mumps->id.schur[i * N + j]; 2627 #else 2628 PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i; 2629 #endif 2630 array[i * N + j] = val; 2631 array[j * N + i] = val; 2632 } 2633 } 2634 } 2635 } 2636 PetscCall(MatDenseRestoreArray(St, &array)); 2637 *S = St; 2638 PetscFunctionReturn(PETSC_SUCCESS); 2639 } 2640 2641 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2642 { 2643 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2644 2645 PetscFunctionBegin; 2646 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2647 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2648 for (i = 0; i < nICNTL_pre; ++i) 2649 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2650 if (i == nICNTL_pre) { /* not already cached */ 2651 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2652 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2653 mumps->ICNTL_pre[0]++; 2654 } 2655 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2656 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2657 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2658 PetscFunctionReturn(PETSC_SUCCESS); 2659 } 2660 2661 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2662 { 2663 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2664 2665 PetscFunctionBegin; 2666 if (mumps->id.job == JOB_NULL) { 2667 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2668 *ival = 0; 2669 for (i = 0; i < nICNTL_pre; ++i) { 2670 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2671 } 2672 } else *ival = mumps->id.ICNTL(icntl); 2673 PetscFunctionReturn(PETSC_SUCCESS); 2674 } 2675 2676 /*@ 2677 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2678 2679 Logically Collective 2680 2681 Input Parameters: 2682 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2683 . icntl - index of MUMPS parameter array ICNTL() 2684 - ival - value of MUMPS ICNTL(icntl) 2685 2686 Options Database Key: 2687 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2688 2689 Level: beginner 2690 2691 References: 2692 . * - MUMPS Users' Guide 2693 2694 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2695 @*/ 2696 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2697 { 2698 PetscFunctionBegin; 2699 PetscValidType(F, 1); 2700 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2701 PetscValidLogicalCollectiveInt(F, icntl, 2); 2702 PetscValidLogicalCollectiveInt(F, ival, 3); 2703 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2704 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2705 PetscFunctionReturn(PETSC_SUCCESS); 2706 } 2707 2708 /*@ 2709 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2710 2711 Logically Collective 2712 2713 Input Parameters: 2714 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2715 - icntl - index of MUMPS parameter array ICNTL() 2716 2717 Output Parameter: 2718 . ival - value of MUMPS ICNTL(icntl) 2719 2720 Level: beginner 2721 2722 References: 2723 . * - MUMPS Users' Guide 2724 2725 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2726 @*/ 2727 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2728 { 2729 PetscFunctionBegin; 2730 PetscValidType(F, 1); 2731 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2732 PetscValidLogicalCollectiveInt(F, icntl, 2); 2733 PetscValidIntPointer(ival, 3); 2734 PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2735 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2736 PetscFunctionReturn(PETSC_SUCCESS); 2737 } 2738 2739 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2740 { 2741 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2742 2743 PetscFunctionBegin; 2744 if (mumps->id.job == JOB_NULL) { 2745 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2746 for (i = 0; i < nCNTL_pre; ++i) 2747 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2748 if (i == nCNTL_pre) { 2749 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2750 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2751 mumps->CNTL_pre[0]++; 2752 } 2753 mumps->CNTL_pre[1 + 2 * i] = icntl; 2754 mumps->CNTL_pre[2 + 2 * i] = val; 2755 } else mumps->id.CNTL(icntl) = val; 2756 PetscFunctionReturn(PETSC_SUCCESS); 2757 } 2758 2759 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2760 { 2761 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2762 2763 PetscFunctionBegin; 2764 if (mumps->id.job == JOB_NULL) { 2765 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2766 *val = 0.0; 2767 for (i = 0; i < nCNTL_pre; ++i) { 2768 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2769 } 2770 } else *val = mumps->id.CNTL(icntl); 2771 PetscFunctionReturn(PETSC_SUCCESS); 2772 } 2773 2774 /*@ 2775 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2776 2777 Logically Collective 2778 2779 Input Parameters: 2780 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2781 . icntl - index of MUMPS parameter array CNTL() 2782 - val - value of MUMPS CNTL(icntl) 2783 2784 Options Database Key: 2785 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2786 2787 Level: beginner 2788 2789 References: 2790 . * - MUMPS Users' Guide 2791 2792 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2793 @*/ 2794 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2795 { 2796 PetscFunctionBegin; 2797 PetscValidType(F, 1); 2798 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2799 PetscValidLogicalCollectiveInt(F, icntl, 2); 2800 PetscValidLogicalCollectiveReal(F, val, 3); 2801 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2802 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2803 PetscFunctionReturn(PETSC_SUCCESS); 2804 } 2805 2806 /*@ 2807 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2808 2809 Logically Collective 2810 2811 Input Parameters: 2812 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2813 - icntl - index of MUMPS parameter array CNTL() 2814 2815 Output Parameter: 2816 . val - value of MUMPS CNTL(icntl) 2817 2818 Level: beginner 2819 2820 References: 2821 . * - MUMPS Users' Guide 2822 2823 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2824 @*/ 2825 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2826 { 2827 PetscFunctionBegin; 2828 PetscValidType(F, 1); 2829 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2830 PetscValidLogicalCollectiveInt(F, icntl, 2); 2831 PetscValidRealPointer(val, 3); 2832 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2833 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2834 PetscFunctionReturn(PETSC_SUCCESS); 2835 } 2836 2837 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2838 { 2839 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2840 2841 PetscFunctionBegin; 2842 *info = mumps->id.INFO(icntl); 2843 PetscFunctionReturn(PETSC_SUCCESS); 2844 } 2845 2846 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2847 { 2848 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2849 2850 PetscFunctionBegin; 2851 *infog = mumps->id.INFOG(icntl); 2852 PetscFunctionReturn(PETSC_SUCCESS); 2853 } 2854 2855 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2856 { 2857 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2858 2859 PetscFunctionBegin; 2860 *rinfo = mumps->id.RINFO(icntl); 2861 PetscFunctionReturn(PETSC_SUCCESS); 2862 } 2863 2864 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2865 { 2866 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2867 2868 PetscFunctionBegin; 2869 *rinfog = mumps->id.RINFOG(icntl); 2870 PetscFunctionReturn(PETSC_SUCCESS); 2871 } 2872 2873 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2874 { 2875 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2876 2877 PetscFunctionBegin; 2878 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2879 *size = 0; 2880 *array = NULL; 2881 if (!mumps->myid) { 2882 *size = mumps->id.INFOG(28); 2883 PetscCall(PetscMalloc1(*size, array)); 2884 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2885 } 2886 PetscFunctionReturn(PETSC_SUCCESS); 2887 } 2888 2889 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2890 { 2891 Mat Bt = NULL, Btseq = NULL; 2892 PetscBool flg; 2893 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2894 PetscScalar *aa; 2895 PetscInt spnr, *ia, *ja, M, nrhs; 2896 2897 PetscFunctionBegin; 2898 PetscValidPointer(spRHS, 2); 2899 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2900 if (flg) { 2901 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2902 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2903 2904 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2905 2906 if (mumps->petsc_size > 1) { 2907 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2908 Btseq = b->A; 2909 } else { 2910 Btseq = Bt; 2911 } 2912 2913 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2914 mumps->id.nrhs = nrhs; 2915 mumps->id.lrhs = M; 2916 mumps->id.rhs = NULL; 2917 2918 if (!mumps->myid) { 2919 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2920 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2921 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2922 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2923 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2924 } else { 2925 mumps->id.irhs_ptr = NULL; 2926 mumps->id.irhs_sparse = NULL; 2927 mumps->id.nz_rhs = 0; 2928 mumps->id.rhs_sparse = NULL; 2929 } 2930 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2931 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2932 2933 /* solve phase */ 2934 mumps->id.job = JOB_SOLVE; 2935 PetscMUMPS_c(mumps); 2936 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2)); 2937 2938 if (!mumps->myid) { 2939 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 2940 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2941 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2942 } 2943 PetscFunctionReturn(PETSC_SUCCESS); 2944 } 2945 2946 /*@ 2947 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 2948 2949 Logically Collective 2950 2951 Input Parameter: 2952 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2953 2954 Output Parameter: 2955 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 2956 2957 Level: beginner 2958 2959 References: 2960 . * - MUMPS Users' Guide 2961 2962 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 2963 @*/ 2964 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 2965 { 2966 PetscFunctionBegin; 2967 PetscValidType(F, 1); 2968 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2969 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 2970 PetscFunctionReturn(PETSC_SUCCESS); 2971 } 2972 2973 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 2974 { 2975 Mat spRHS; 2976 2977 PetscFunctionBegin; 2978 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 2979 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 2980 PetscCall(MatDestroy(&spRHS)); 2981 PetscFunctionReturn(PETSC_SUCCESS); 2982 } 2983 2984 /*@ 2985 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 2986 2987 Logically Collective 2988 2989 Input Parameter: 2990 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2991 2992 Output Parameter: 2993 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 2994 2995 Level: beginner 2996 2997 References: 2998 . * - MUMPS Users' Guide 2999 3000 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3001 @*/ 3002 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3003 { 3004 PetscBool flg; 3005 3006 PetscFunctionBegin; 3007 PetscValidType(F, 1); 3008 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3009 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3010 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3011 3012 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3013 PetscFunctionReturn(PETSC_SUCCESS); 3014 } 3015 3016 /*@ 3017 MatMumpsGetInfo - Get MUMPS parameter INFO() 3018 3019 Logically Collective 3020 3021 Input Parameters: 3022 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3023 - icntl - index of MUMPS parameter array INFO() 3024 3025 Output Parameter: 3026 . ival - value of MUMPS INFO(icntl) 3027 3028 Level: beginner 3029 3030 References: 3031 . * - MUMPS Users' Guide 3032 3033 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3034 @*/ 3035 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3036 { 3037 PetscFunctionBegin; 3038 PetscValidType(F, 1); 3039 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3040 PetscValidIntPointer(ival, 3); 3041 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3042 PetscFunctionReturn(PETSC_SUCCESS); 3043 } 3044 3045 /*@ 3046 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3047 3048 Logically Collective 3049 3050 Input Parameters: 3051 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3052 - icntl - index of MUMPS parameter array INFOG() 3053 3054 Output Parameter: 3055 . ival - value of MUMPS INFOG(icntl) 3056 3057 Level: beginner 3058 3059 References: 3060 . * - MUMPS Users' Guide 3061 3062 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3063 @*/ 3064 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3065 { 3066 PetscFunctionBegin; 3067 PetscValidType(F, 1); 3068 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3069 PetscValidIntPointer(ival, 3); 3070 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3071 PetscFunctionReturn(PETSC_SUCCESS); 3072 } 3073 3074 /*@ 3075 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3076 3077 Logically Collective 3078 3079 Input Parameters: 3080 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3081 - icntl - index of MUMPS parameter array RINFO() 3082 3083 Output Parameter: 3084 . val - value of MUMPS RINFO(icntl) 3085 3086 Level: beginner 3087 3088 References: 3089 . * - MUMPS Users' Guide 3090 3091 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3092 @*/ 3093 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3094 { 3095 PetscFunctionBegin; 3096 PetscValidType(F, 1); 3097 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3098 PetscValidRealPointer(val, 3); 3099 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3100 PetscFunctionReturn(PETSC_SUCCESS); 3101 } 3102 3103 /*@ 3104 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3105 3106 Logically Collective 3107 3108 Input Parameters: 3109 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3110 - icntl - index of MUMPS parameter array RINFOG() 3111 3112 Output Parameter: 3113 . val - value of MUMPS RINFOG(icntl) 3114 3115 Level: beginner 3116 3117 References: 3118 . * - MUMPS Users' Guide 3119 3120 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3121 @*/ 3122 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3123 { 3124 PetscFunctionBegin; 3125 PetscValidType(F, 1); 3126 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3127 PetscValidRealPointer(val, 3); 3128 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3129 PetscFunctionReturn(PETSC_SUCCESS); 3130 } 3131 3132 /*@ 3133 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3134 3135 Logically Collective 3136 3137 Input Parameter: 3138 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3139 3140 Output Parameters: 3141 + size - local size of the array. The size of the array is non-zero only on the host. 3142 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3143 for freeing this array. 3144 3145 Level: beginner 3146 3147 References: 3148 . * - MUMPS Users' Guide 3149 3150 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3151 @*/ 3152 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3153 { 3154 PetscFunctionBegin; 3155 PetscValidType(F, 1); 3156 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3157 PetscValidIntPointer(size, 3); 3158 PetscValidPointer(array, 4); 3159 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3160 PetscFunctionReturn(PETSC_SUCCESS); 3161 } 3162 3163 /*MC 3164 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3165 distributed and sequential matrices via the external package MUMPS. 3166 3167 Works with `MATAIJ` and `MATSBAIJ` matrices 3168 3169 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3170 3171 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3172 See details below. 3173 3174 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3175 3176 Options Database Keys: 3177 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3178 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3179 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3180 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3181 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3182 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3183 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3184 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3185 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3186 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3187 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3188 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3189 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3190 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3191 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3192 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3193 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3194 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3195 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3196 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3197 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3198 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3199 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3200 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3201 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3202 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3203 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3204 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3205 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3206 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3207 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3208 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3209 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3210 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3211 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3212 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3213 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3214 3215 Level: beginner 3216 3217 Notes: 3218 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3219 error if the matrix is Hermitian. 3220 3221 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3222 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3223 3224 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3225 the failure with 3226 .vb 3227 KSPGetPC(ksp,&pc); 3228 PCFactorGetMatrix(pc,&mat); 3229 MatMumpsGetInfo(mat,....); 3230 MatMumpsGetInfog(mat,....); etc. 3231 .ve 3232 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3233 3234 MUMPS provides 64-bit integer support in two build modes: 3235 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3236 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3237 3238 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3239 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3240 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3241 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3242 3243 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3244 3245 Two modes to run MUMPS/PETSc with OpenMP 3246 .vb 3247 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3248 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3249 .ve 3250 3251 .vb 3252 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3253 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3254 .ve 3255 3256 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3257 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3258 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3259 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3260 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3261 3262 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3263 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3264 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3265 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3266 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3267 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3268 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3269 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3270 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3271 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3272 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3273 examine the mapping result. 3274 3275 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3276 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3277 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3278 3279 References: 3280 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3281 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3282 3283 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3284 M*/ 3285 3286 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3287 { 3288 PetscFunctionBegin; 3289 *type = MATSOLVERMUMPS; 3290 PetscFunctionReturn(PETSC_SUCCESS); 3291 } 3292 3293 /* MatGetFactor for Seq and MPI AIJ matrices */ 3294 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3295 { 3296 Mat B; 3297 Mat_MUMPS *mumps; 3298 PetscBool isSeqAIJ; 3299 PetscMPIInt size; 3300 3301 PetscFunctionBegin; 3302 #if defined(PETSC_USE_COMPLEX) 3303 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3304 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3305 *F = NULL; 3306 PetscFunctionReturn(PETSC_SUCCESS); 3307 } 3308 #endif 3309 /* Create the factorization matrix */ 3310 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3311 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3312 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3313 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3314 PetscCall(MatSetUp(B)); 3315 3316 PetscCall(PetscNew(&mumps)); 3317 3318 B->ops->view = MatView_MUMPS; 3319 B->ops->getinfo = MatGetInfo_MUMPS; 3320 3321 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3322 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3323 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3324 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3325 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3326 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3327 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3328 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3329 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3330 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3331 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3332 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3333 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3334 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3335 3336 if (ftype == MAT_FACTOR_LU) { 3337 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3338 B->factortype = MAT_FACTOR_LU; 3339 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3340 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3341 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3342 mumps->sym = 0; 3343 } else { 3344 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3345 B->factortype = MAT_FACTOR_CHOLESKY; 3346 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3347 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3348 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3349 #if defined(PETSC_USE_COMPLEX) 3350 mumps->sym = 2; 3351 #else 3352 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3353 else mumps->sym = 2; 3354 #endif 3355 } 3356 3357 /* set solvertype */ 3358 PetscCall(PetscFree(B->solvertype)); 3359 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3360 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3361 if (size == 1) { 3362 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3363 B->canuseordering = PETSC_TRUE; 3364 } 3365 B->ops->destroy = MatDestroy_MUMPS; 3366 B->data = (void *)mumps; 3367 3368 *F = B; 3369 mumps->id.job = JOB_NULL; 3370 mumps->ICNTL_pre = NULL; 3371 mumps->CNTL_pre = NULL; 3372 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3373 PetscFunctionReturn(PETSC_SUCCESS); 3374 } 3375 3376 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3377 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3378 { 3379 Mat B; 3380 Mat_MUMPS *mumps; 3381 PetscBool isSeqSBAIJ; 3382 PetscMPIInt size; 3383 3384 PetscFunctionBegin; 3385 #if defined(PETSC_USE_COMPLEX) 3386 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3387 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3388 *F = NULL; 3389 PetscFunctionReturn(PETSC_SUCCESS); 3390 } 3391 #endif 3392 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3393 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3394 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3395 PetscCall(MatSetUp(B)); 3396 3397 PetscCall(PetscNew(&mumps)); 3398 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3399 if (isSeqSBAIJ) { 3400 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3401 } else { 3402 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3403 } 3404 3405 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3406 B->ops->view = MatView_MUMPS; 3407 B->ops->getinfo = MatGetInfo_MUMPS; 3408 3409 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3410 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3411 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3412 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3413 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3414 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3415 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3416 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3417 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3418 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3419 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3420 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3421 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3422 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3423 3424 B->factortype = MAT_FACTOR_CHOLESKY; 3425 #if defined(PETSC_USE_COMPLEX) 3426 mumps->sym = 2; 3427 #else 3428 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3429 else mumps->sym = 2; 3430 #endif 3431 3432 /* set solvertype */ 3433 PetscCall(PetscFree(B->solvertype)); 3434 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3435 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3436 if (size == 1) { 3437 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3438 B->canuseordering = PETSC_TRUE; 3439 } 3440 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3441 B->ops->destroy = MatDestroy_MUMPS; 3442 B->data = (void *)mumps; 3443 3444 *F = B; 3445 mumps->id.job = JOB_NULL; 3446 mumps->ICNTL_pre = NULL; 3447 mumps->CNTL_pre = NULL; 3448 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3449 PetscFunctionReturn(PETSC_SUCCESS); 3450 } 3451 3452 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3453 { 3454 Mat B; 3455 Mat_MUMPS *mumps; 3456 PetscBool isSeqBAIJ; 3457 PetscMPIInt size; 3458 3459 PetscFunctionBegin; 3460 /* Create the factorization matrix */ 3461 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3462 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3463 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3464 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3465 PetscCall(MatSetUp(B)); 3466 3467 PetscCall(PetscNew(&mumps)); 3468 if (ftype == MAT_FACTOR_LU) { 3469 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3470 B->factortype = MAT_FACTOR_LU; 3471 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3472 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3473 mumps->sym = 0; 3474 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3475 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3476 3477 B->ops->view = MatView_MUMPS; 3478 B->ops->getinfo = MatGetInfo_MUMPS; 3479 3480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3481 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3484 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3485 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3486 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3487 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3488 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3489 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3490 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3491 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3492 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3493 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3494 3495 /* set solvertype */ 3496 PetscCall(PetscFree(B->solvertype)); 3497 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3498 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3499 if (size == 1) { 3500 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3501 B->canuseordering = PETSC_TRUE; 3502 } 3503 B->ops->destroy = MatDestroy_MUMPS; 3504 B->data = (void *)mumps; 3505 3506 *F = B; 3507 mumps->id.job = JOB_NULL; 3508 mumps->ICNTL_pre = NULL; 3509 mumps->CNTL_pre = NULL; 3510 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3511 PetscFunctionReturn(PETSC_SUCCESS); 3512 } 3513 3514 /* MatGetFactor for Seq and MPI SELL matrices */ 3515 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3516 { 3517 Mat B; 3518 Mat_MUMPS *mumps; 3519 PetscBool isSeqSELL; 3520 PetscMPIInt size; 3521 3522 PetscFunctionBegin; 3523 /* Create the factorization matrix */ 3524 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3525 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3526 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3527 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3528 PetscCall(MatSetUp(B)); 3529 3530 PetscCall(PetscNew(&mumps)); 3531 3532 B->ops->view = MatView_MUMPS; 3533 B->ops->getinfo = MatGetInfo_MUMPS; 3534 3535 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3536 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3537 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3538 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3539 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3540 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3542 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3544 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3545 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3547 3548 if (ftype == MAT_FACTOR_LU) { 3549 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3550 B->factortype = MAT_FACTOR_LU; 3551 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3552 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3553 mumps->sym = 0; 3554 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3555 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3556 3557 /* set solvertype */ 3558 PetscCall(PetscFree(B->solvertype)); 3559 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3560 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3561 if (size == 1) { 3562 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3563 B->canuseordering = PETSC_TRUE; 3564 } 3565 B->ops->destroy = MatDestroy_MUMPS; 3566 B->data = (void *)mumps; 3567 3568 *F = B; 3569 mumps->id.job = JOB_NULL; 3570 mumps->ICNTL_pre = NULL; 3571 mumps->CNTL_pre = NULL; 3572 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3573 PetscFunctionReturn(PETSC_SUCCESS); 3574 } 3575 3576 /* MatGetFactor for MATNEST matrices */ 3577 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3578 { 3579 Mat B, **mats; 3580 Mat_MUMPS *mumps; 3581 PetscInt nr, nc; 3582 PetscMPIInt size; 3583 PetscBool flg = PETSC_TRUE; 3584 3585 PetscFunctionBegin; 3586 #if defined(PETSC_USE_COMPLEX) 3587 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3588 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3589 *F = NULL; 3590 PetscFunctionReturn(PETSC_SUCCESS); 3591 } 3592 #endif 3593 3594 /* Return if some condition is not satisfied */ 3595 *F = NULL; 3596 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3597 if (ftype == MAT_FACTOR_CHOLESKY) { 3598 IS *rows, *cols; 3599 PetscInt *m, *M; 3600 3601 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3602 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3603 PetscCall(MatNestGetISs(A, rows, cols)); 3604 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3605 if (!flg) { 3606 PetscCall(PetscFree2(rows, cols)); 3607 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3608 PetscFunctionReturn(PETSC_SUCCESS); 3609 } 3610 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3611 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3612 for (PetscInt r = 0; flg && r < nr; r++) 3613 for (PetscInt k = r + 1; flg && k < nr; k++) 3614 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3615 PetscCall(PetscFree2(m, M)); 3616 PetscCall(PetscFree2(rows, cols)); 3617 if (!flg) { 3618 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3619 PetscFunctionReturn(PETSC_SUCCESS); 3620 } 3621 } 3622 3623 for (PetscInt r = 0; r < nr; r++) { 3624 for (PetscInt c = 0; c < nc; c++) { 3625 Mat sub = mats[r][c]; 3626 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans; 3627 3628 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3629 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3630 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3631 else { 3632 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3633 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3634 } 3635 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3636 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3637 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3638 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3639 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3640 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3641 if (ftype == MAT_FACTOR_CHOLESKY) { 3642 if (r == c && !isSeqAIJ && !isMPIAIJ && !isSeqSBAIJ && !isMPISBAIJ) { 3643 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3644 flg = PETSC_FALSE; 3645 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3646 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3647 flg = PETSC_FALSE; 3648 } 3649 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) { 3650 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3651 flg = PETSC_FALSE; 3652 } 3653 } 3654 } 3655 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3656 3657 /* Create the factorization matrix */ 3658 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3659 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3660 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3661 PetscCall(MatSetUp(B)); 3662 3663 PetscCall(PetscNew(&mumps)); 3664 3665 B->ops->view = MatView_MUMPS; 3666 B->ops->getinfo = MatGetInfo_MUMPS; 3667 3668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3669 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3670 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3675 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3680 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3681 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3682 3683 if (ftype == MAT_FACTOR_LU) { 3684 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3685 B->factortype = MAT_FACTOR_LU; 3686 mumps->sym = 0; 3687 } else { 3688 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3689 B->factortype = MAT_FACTOR_CHOLESKY; 3690 #if defined(PETSC_USE_COMPLEX) 3691 mumps->sym = 2; 3692 #else 3693 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3694 else mumps->sym = 2; 3695 #endif 3696 } 3697 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3698 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3699 3700 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3701 if (size == 1) { 3702 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3703 B->canuseordering = PETSC_TRUE; 3704 } 3705 3706 /* set solvertype */ 3707 PetscCall(PetscFree(B->solvertype)); 3708 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3709 B->ops->destroy = MatDestroy_MUMPS; 3710 B->data = (void *)mumps; 3711 3712 *F = B; 3713 mumps->id.job = JOB_NULL; 3714 mumps->ICNTL_pre = NULL; 3715 mumps->CNTL_pre = NULL; 3716 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3717 PetscFunctionReturn(PETSC_SUCCESS); 3718 } 3719 3720 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3721 { 3722 PetscFunctionBegin; 3723 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3724 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3725 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3726 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3727 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3728 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3729 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3730 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3731 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3732 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3733 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3734 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3735 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3736 PetscFunctionReturn(PETSC_SUCCESS); 3737 } 3738