1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)a; 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 86 PetscFunctionBegin; 87 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 88 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 89 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 90 if (set) *set = myset; 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 94 95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 96 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 97 #define PetscMUMPS_c(mumps) \ 98 do { \ 99 if (mumps->use_petsc_omp_support) { \ 100 if (mumps->is_omp_master) { \ 101 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 102 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 103 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 104 PetscCall(PetscFPTrapPop()); \ 105 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 106 } \ 107 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 108 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 109 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 110 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 111 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 112 */ \ 113 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 116 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 117 } else { \ 118 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 119 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 120 PetscCall(PetscFPTrapPop()); \ 121 } \ 122 } while (0) 123 #else 124 #define PetscMUMPS_c(mumps) \ 125 do { \ 126 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 127 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 128 PetscCall(PetscFPTrapPop()); \ 129 } while (0) 130 #endif 131 132 /* declare MumpsScalar */ 133 #if defined(PETSC_USE_COMPLEX) 134 #if defined(PETSC_USE_REAL_SINGLE) 135 #define MumpsScalar mumps_complex 136 #else 137 #define MumpsScalar mumps_double_complex 138 #endif 139 #else 140 #define MumpsScalar PetscScalar 141 #endif 142 143 /* macros s.t. indices match MUMPS documentation */ 144 #define ICNTL(I) icntl[(I) - 1] 145 #define CNTL(I) cntl[(I) - 1] 146 #define INFOG(I) infog[(I) - 1] 147 #define INFO(I) info[(I) - 1] 148 #define RINFOG(I) rinfog[(I) - 1] 149 #define RINFO(I) rinfo[(I) - 1] 150 151 typedef struct Mat_MUMPS Mat_MUMPS; 152 struct Mat_MUMPS { 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_REAL_SINGLE) 155 CMUMPS_STRUC_C id; 156 #else 157 ZMUMPS_STRUC_C id; 158 #endif 159 #else 160 #if defined(PETSC_USE_REAL_SINGLE) 161 SMUMPS_STRUC_C id; 162 #else 163 DMUMPS_STRUC_C id; 164 #endif 165 #endif 166 167 MatStructure matstruc; 168 PetscMPIInt myid, petsc_size; 169 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 170 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 171 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 172 PetscMUMPSInt sym; 173 MPI_Comm mumps_comm; 174 PetscMUMPSInt *ICNTL_pre; 175 PetscReal *CNTL_pre; 176 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 177 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 178 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 179 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 180 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 181 PetscInt *rhs_nrow, max_nrhs; 182 PetscMPIInt *rhs_recvcounts, *rhs_disps; 183 PetscScalar *rhs_loc, *rhs_recvbuf; 184 #endif 185 Vec b_seq, x_seq; 186 PetscInt ninfo, *info; /* which INFO to display */ 187 PetscInt sizeredrhs; 188 PetscScalar *schur_sol; 189 PetscInt schur_sizesol; 190 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 191 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 192 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 193 194 /* Support for MATNEST */ 195 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 196 PetscCount *nest_vals_start; 197 PetscScalar *nest_vals; 198 199 /* stuff used by petsc/mumps OpenMP support*/ 200 PetscBool use_petsc_omp_support; 201 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 202 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 203 PetscCount *recvcount; /* a collection of nnz on omp_master */ 204 PetscMPIInt tag, omp_comm_size; 205 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 206 MPI_Request *reqs; 207 }; 208 209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 210 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 211 */ 212 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 213 { 214 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 215 216 PetscFunctionBegin; 217 #if defined(PETSC_USE_64BIT_INDICES) 218 { 219 PetscInt i; 220 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 221 PetscCall(PetscFree(mumps->ia_alloc)); 222 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 223 mumps->cur_ilen = nrow + 1; 224 } 225 if (nnz > mumps->cur_jlen) { 226 PetscCall(PetscFree(mumps->ja_alloc)); 227 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 228 mumps->cur_jlen = nnz; 229 } 230 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 231 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 232 *ia_mumps = mumps->ia_alloc; 233 *ja_mumps = mumps->ja_alloc; 234 } 235 #else 236 *ia_mumps = ia; 237 *ja_mumps = ja; 238 #endif 239 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 244 { 245 PetscFunctionBegin; 246 PetscCall(PetscFree(mumps->id.listvar_schur)); 247 PetscCall(PetscFree(mumps->id.redrhs)); 248 PetscCall(PetscFree(mumps->schur_sol)); 249 mumps->id.size_schur = 0; 250 mumps->id.schur_lld = 0; 251 mumps->id.ICNTL(19) = 0; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /* solve with rhs in mumps->id.redrhs and return in the same location */ 256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 257 { 258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 259 Mat S, B, X; 260 MatFactorSchurStatus schurstatus; 261 PetscInt sizesol; 262 263 PetscFunctionBegin; 264 PetscCall(MatFactorFactorizeSchurComplement(F)); 265 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 267 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 269 PetscCall(MatBindToCPU(B, S->boundtocpu)); 270 #endif 271 switch (schurstatus) { 272 case MAT_FACTOR_SCHUR_FACTORED: 273 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 274 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 276 PetscCall(MatBindToCPU(X, S->boundtocpu)); 277 #endif 278 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 279 PetscCall(MatMatSolveTranspose(S, B, X)); 280 } else { 281 PetscCall(MatMatSolve(S, B, X)); 282 } 283 break; 284 case MAT_FACTOR_SCHUR_INVERTED: 285 sizesol = mumps->id.nrhs * mumps->id.size_schur; 286 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 287 PetscCall(PetscFree(mumps->schur_sol)); 288 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 289 mumps->schur_sizesol = sizesol; 290 } 291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 292 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 294 PetscCall(MatBindToCPU(X, S->boundtocpu)); 295 #endif 296 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 297 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 298 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 299 } else { 300 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 301 } 302 PetscCall(MatProductSetFromOptions(X)); 303 PetscCall(MatProductSymbolic(X)); 304 PetscCall(MatProductNumeric(X)); 305 306 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 307 break; 308 default: 309 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 310 } 311 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 312 PetscCall(MatDestroy(&B)); 313 PetscCall(MatDestroy(&X)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 318 { 319 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 320 321 PetscFunctionBegin; 322 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 if (!expansion) { /* prepare for the condensation step */ 326 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 327 /* allocate MUMPS internal array to store reduced right-hand sides */ 328 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 329 PetscCall(PetscFree(mumps->id.redrhs)); 330 mumps->id.lredrhs = mumps->id.size_schur; 331 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 332 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 333 } 334 } else { /* prepare for the expansion step */ 335 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 336 PetscCall(MatMumpsSolveSchur_Private(F)); 337 mumps->id.ICNTL(26) = 2; /* expansion phase */ 338 PetscMUMPS_c(mumps); 339 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 340 /* restore defaults */ 341 mumps->id.ICNTL(26) = -1; 342 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 343 if (mumps->id.nrhs > 1) { 344 PetscCall(PetscFree(mumps->id.redrhs)); 345 mumps->id.lredrhs = 0; 346 mumps->sizeredrhs = 0; 347 } 348 } 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 /* 353 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 354 355 input: 356 A - matrix in aij,baij or sbaij format 357 shift - 0: C style output triple; 1: Fortran style output triple. 358 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 359 MAT_REUSE_MATRIX: only the values in v array are updated 360 output: 361 nnz - dim of r, c, and v (number of local nonzero entries of A) 362 r, c, v - row and col index, matrix values (matrix triples) 363 364 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 365 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 366 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 367 368 */ 369 370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 371 { 372 const PetscScalar *av; 373 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 374 PetscCount nz, rnz, k; 375 PetscMUMPSInt *row, *col; 376 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 377 378 PetscFunctionBegin; 379 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ai = aa->i; 383 aj = aa->j; 384 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 385 for (PetscCount i = k = 0; i < M; i++) { 386 rnz = ai[i + 1] - ai[i]; 387 ajj = aj + ai[i]; 388 for (PetscCount j = 0; j < rnz; j++) { 389 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 390 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 391 k++; 392 } 393 } 394 mumps->val = (PetscScalar *)av; 395 mumps->irn = row; 396 mumps->jcn = col; 397 mumps->nnz = nz; 398 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 399 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 400 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 404 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 405 { 406 PetscCount nz, i, j, k, r; 407 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 408 PetscMUMPSInt *row, *col; 409 410 PetscFunctionBegin; 411 nz = a->sliidx[a->totalslices]; 412 if (reuse == MAT_INITIAL_MATRIX) { 413 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 414 for (i = k = 0; i < a->totalslices; i++) { 415 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 416 } 417 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 418 mumps->irn = row; 419 mumps->jcn = col; 420 mumps->nnz = nz; 421 mumps->val = a->val; 422 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 423 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 428 { 429 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 430 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 431 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 432 PetscInt bs; 433 PetscMUMPSInt *row, *col; 434 435 PetscFunctionBegin; 436 if (reuse == MAT_INITIAL_MATRIX) { 437 PetscCall(MatGetBlockSize(A, &bs)); 438 M = A->rmap->N / bs; 439 ai = aa->i; 440 aj = aa->j; 441 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 442 for (i = 0; i < M; i++) { 443 ajj = aj + ai[i]; 444 rnz = ai[i + 1] - ai[i]; 445 for (k = 0; k < rnz; k++) { 446 for (j = 0; j < bs; j++) { 447 for (m = 0; m < bs; m++) { 448 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 449 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 450 idx++; 451 } 452 } 453 } 454 } 455 mumps->irn = row; 456 mumps->jcn = col; 457 mumps->nnz = nz; 458 mumps->val = aa->a; 459 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 460 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 461 PetscFunctionReturn(PETSC_SUCCESS); 462 } 463 464 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 465 { 466 const PetscInt *ai, *aj, *ajj; 467 PetscInt bs; 468 PetscCount nz, rnz, i, j, k, m; 469 PetscMUMPSInt *row, *col; 470 PetscScalar *val; 471 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 472 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 473 #if defined(PETSC_USE_COMPLEX) 474 PetscBool isset, hermitian; 475 #endif 476 477 PetscFunctionBegin; 478 #if defined(PETSC_USE_COMPLEX) 479 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 480 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 481 #endif 482 ai = aa->i; 483 aj = aa->j; 484 PetscCall(MatGetBlockSize(A, &bs)); 485 if (reuse == MAT_INITIAL_MATRIX) { 486 const PetscCount alloc_size = aa->nz * bs2; 487 488 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 489 if (bs > 1) { 490 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 491 mumps->val = mumps->val_alloc; 492 } else { 493 mumps->val = aa->a; 494 } 495 mumps->irn = row; 496 mumps->jcn = col; 497 } else { 498 row = mumps->irn; 499 col = mumps->jcn; 500 } 501 val = mumps->val; 502 503 nz = 0; 504 if (bs > 1) { 505 for (i = 0; i < mbs; i++) { 506 rnz = ai[i + 1] - ai[i]; 507 ajj = aj + ai[i]; 508 for (j = 0; j < rnz; j++) { 509 for (k = 0; k < bs; k++) { 510 for (m = 0; m < bs; m++) { 511 if (ajj[j] > i || k >= m) { 512 if (reuse == MAT_INITIAL_MATRIX) { 513 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 514 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 515 } 516 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 517 } 518 } 519 } 520 } 521 } 522 } else if (reuse == MAT_INITIAL_MATRIX) { 523 for (i = 0; i < mbs; i++) { 524 rnz = ai[i + 1] - ai[i]; 525 ajj = aj + ai[i]; 526 for (j = 0; j < rnz; j++) { 527 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 528 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 529 nz++; 530 } 531 } 532 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 533 } else if (mumps->nest_vals) 534 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 535 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 536 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 537 PetscFunctionReturn(PETSC_SUCCESS); 538 } 539 540 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 541 { 542 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 543 PetscCount nz, rnz, i, j; 544 const PetscScalar *av, *v1; 545 PetscScalar *val; 546 PetscMUMPSInt *row, *col; 547 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 548 PetscBool missing; 549 #if defined(PETSC_USE_COMPLEX) 550 PetscBool hermitian, isset; 551 #endif 552 553 PetscFunctionBegin; 554 #if defined(PETSC_USE_COMPLEX) 555 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 556 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 557 #endif 558 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 559 ai = aa->i; 560 aj = aa->j; 561 adiag = aa->diag; 562 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 563 if (reuse == MAT_INITIAL_MATRIX) { 564 /* count nz in the upper triangular part of A */ 565 nz = 0; 566 if (missing) { 567 for (i = 0; i < M; i++) { 568 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 569 for (j = ai[i]; j < ai[i + 1]; j++) { 570 if (aj[j] < i) continue; 571 nz++; 572 } 573 } else { 574 nz += ai[i + 1] - adiag[i]; 575 } 576 } 577 } else { 578 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 579 } 580 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 581 PetscCall(PetscMalloc1(nz, &val)); 582 mumps->nnz = nz; 583 mumps->irn = row; 584 mumps->jcn = col; 585 mumps->val = mumps->val_alloc = val; 586 587 nz = 0; 588 if (missing) { 589 for (i = 0; i < M; i++) { 590 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 591 for (j = ai[i]; j < ai[i + 1]; j++) { 592 if (aj[j] < i) continue; 593 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 594 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 595 val[nz] = av[j]; 596 nz++; 597 } 598 } else { 599 rnz = ai[i + 1] - adiag[i]; 600 ajj = aj + adiag[i]; 601 v1 = av + adiag[i]; 602 for (j = 0; j < rnz; j++) { 603 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 604 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 605 val[nz++] = v1[j]; 606 } 607 } 608 } 609 } else { 610 for (i = 0; i < M; i++) { 611 rnz = ai[i + 1] - adiag[i]; 612 ajj = aj + adiag[i]; 613 v1 = av + adiag[i]; 614 for (j = 0; j < rnz; j++) { 615 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 616 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 617 val[nz++] = v1[j]; 618 } 619 } 620 } 621 } else { 622 nz = 0; 623 val = mumps->val; 624 if (missing) { 625 for (i = 0; i < M; i++) { 626 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 627 for (j = ai[i]; j < ai[i + 1]; j++) { 628 if (aj[j] < i) continue; 629 val[nz++] = av[j]; 630 } 631 } else { 632 rnz = ai[i + 1] - adiag[i]; 633 v1 = av + adiag[i]; 634 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 635 } 636 } 637 } else { 638 for (i = 0; i < M; i++) { 639 rnz = ai[i + 1] - adiag[i]; 640 v1 = av + adiag[i]; 641 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 642 } 643 } 644 } 645 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 646 PetscFunctionReturn(PETSC_SUCCESS); 647 } 648 649 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 650 { 651 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 652 PetscInt bs; 653 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 654 PetscMUMPSInt *row, *col; 655 const PetscScalar *av, *bv, *v1, *v2; 656 PetscScalar *val; 657 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 658 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 659 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 660 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 661 #if defined(PETSC_USE_COMPLEX) 662 PetscBool hermitian, isset; 663 #endif 664 665 PetscFunctionBegin; 666 #if defined(PETSC_USE_COMPLEX) 667 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 668 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 669 #endif 670 PetscCall(MatGetBlockSize(A, &bs)); 671 rstart = A->rmap->rstart; 672 ai = aa->i; 673 aj = aa->j; 674 bi = bb->i; 675 bj = bb->j; 676 av = aa->a; 677 bv = bb->a; 678 679 garray = mat->garray; 680 681 if (reuse == MAT_INITIAL_MATRIX) { 682 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 683 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 684 PetscCall(PetscMalloc1(nz, &val)); 685 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 686 mumps->irn = row; 687 mumps->jcn = col; 688 mumps->val = mumps->val_alloc = val; 689 } else { 690 val = mumps->val; 691 } 692 693 jj = 0; 694 irow = rstart; 695 for (i = 0; i < mbs; i++) { 696 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 697 countA = ai[i + 1] - ai[i]; 698 countB = bi[i + 1] - bi[i]; 699 bjj = bj + bi[i]; 700 v1 = av + ai[i] * bs2; 701 v2 = bv + bi[i] * bs2; 702 703 if (bs > 1) { 704 /* A-part */ 705 for (j = 0; j < countA; j++) { 706 for (k = 0; k < bs; k++) { 707 for (m = 0; m < bs; m++) { 708 if (rstart + ajj[j] * bs > irow || k >= m) { 709 if (reuse == MAT_INITIAL_MATRIX) { 710 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 711 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 712 } 713 val[jj++] = v1[j * bs2 + m + k * bs]; 714 } 715 } 716 } 717 } 718 719 /* B-part */ 720 for (j = 0; j < countB; j++) { 721 for (k = 0; k < bs; k++) { 722 for (m = 0; m < bs; m++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 725 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 726 } 727 val[jj++] = v2[j * bs2 + m + k * bs]; 728 } 729 } 730 } 731 } else { 732 /* A-part */ 733 for (j = 0; j < countA; j++) { 734 if (reuse == MAT_INITIAL_MATRIX) { 735 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 736 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 737 } 738 val[jj++] = v1[j]; 739 } 740 741 /* B-part */ 742 for (j = 0; j < countB; j++) { 743 if (reuse == MAT_INITIAL_MATRIX) { 744 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 745 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 746 } 747 val[jj++] = v2[j]; 748 } 749 } 750 irow += bs; 751 } 752 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 753 PetscFunctionReturn(PETSC_SUCCESS); 754 } 755 756 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 757 { 758 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 759 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 760 PetscMUMPSInt *row, *col; 761 const PetscScalar *av, *bv, *v1, *v2; 762 PetscScalar *val; 763 Mat Ad, Ao; 764 Mat_SeqAIJ *aa; 765 Mat_SeqAIJ *bb; 766 767 PetscFunctionBegin; 768 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 769 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 770 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 771 772 aa = (Mat_SeqAIJ *)Ad->data; 773 bb = (Mat_SeqAIJ *)Ao->data; 774 ai = aa->i; 775 aj = aa->j; 776 bi = bb->i; 777 bj = bb->j; 778 779 rstart = A->rmap->rstart; 780 cstart = A->cmap->rstart; 781 782 if (reuse == MAT_INITIAL_MATRIX) { 783 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 784 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 785 PetscCall(PetscMalloc1(nz, &val)); 786 mumps->nnz = nz; 787 mumps->irn = row; 788 mumps->jcn = col; 789 mumps->val = mumps->val_alloc = val; 790 } else { 791 val = mumps->val; 792 } 793 794 jj = 0; 795 irow = rstart; 796 for (i = 0; i < m; i++) { 797 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 798 countA = ai[i + 1] - ai[i]; 799 countB = bi[i + 1] - bi[i]; 800 bjj = bj + bi[i]; 801 v1 = av + ai[i]; 802 v2 = bv + bi[i]; 803 804 /* A-part */ 805 for (j = 0; j < countA; j++) { 806 if (reuse == MAT_INITIAL_MATRIX) { 807 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 808 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 809 } 810 val[jj++] = v1[j]; 811 } 812 813 /* B-part */ 814 for (j = 0; j < countB; j++) { 815 if (reuse == MAT_INITIAL_MATRIX) { 816 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 817 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 818 } 819 val[jj++] = v2[j]; 820 } 821 irow++; 822 } 823 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 824 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 825 PetscFunctionReturn(PETSC_SUCCESS); 826 } 827 828 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 829 { 830 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 831 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 832 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 833 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 834 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 835 const PetscInt bs2 = mat->bs2; 836 PetscInt bs; 837 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 838 PetscMUMPSInt *row, *col; 839 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 840 PetscScalar *val; 841 842 PetscFunctionBegin; 843 PetscCall(MatGetBlockSize(A, &bs)); 844 if (reuse == MAT_INITIAL_MATRIX) { 845 nz = bs2 * (aa->nz + bb->nz); 846 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 847 PetscCall(PetscMalloc1(nz, &val)); 848 mumps->nnz = nz; 849 mumps->irn = row; 850 mumps->jcn = col; 851 mumps->val = mumps->val_alloc = val; 852 } else { 853 val = mumps->val; 854 } 855 856 jj = 0; 857 irow = rstart; 858 for (i = 0; i < mbs; i++) { 859 countA = ai[i + 1] - ai[i]; 860 countB = bi[i + 1] - bi[i]; 861 ajj = aj + ai[i]; 862 bjj = bj + bi[i]; 863 v1 = av + bs2 * ai[i]; 864 v2 = bv + bs2 * bi[i]; 865 866 idx = 0; 867 /* A-part */ 868 for (k = 0; k < countA; k++) { 869 for (j = 0; j < bs; j++) { 870 for (n = 0; n < bs; n++) { 871 if (reuse == MAT_INITIAL_MATRIX) { 872 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 873 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 874 } 875 val[jj++] = v1[idx++]; 876 } 877 } 878 } 879 880 idx = 0; 881 /* B-part */ 882 for (k = 0; k < countB; k++) { 883 for (j = 0; j < bs; j++) { 884 for (n = 0; n < bs; n++) { 885 if (reuse == MAT_INITIAL_MATRIX) { 886 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 887 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 888 } 889 val[jj++] = v2[idx++]; 890 } 891 } 892 } 893 irow += bs; 894 } 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 899 { 900 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 901 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 902 PetscMUMPSInt *row, *col; 903 const PetscScalar *av, *bv, *v1, *v2; 904 PetscScalar *val; 905 Mat Ad, Ao; 906 Mat_SeqAIJ *aa; 907 Mat_SeqAIJ *bb; 908 #if defined(PETSC_USE_COMPLEX) 909 PetscBool hermitian, isset; 910 #endif 911 912 PetscFunctionBegin; 913 #if defined(PETSC_USE_COMPLEX) 914 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 915 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 916 #endif 917 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 918 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 919 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 920 921 aa = (Mat_SeqAIJ *)Ad->data; 922 bb = (Mat_SeqAIJ *)Ao->data; 923 ai = aa->i; 924 aj = aa->j; 925 adiag = aa->diag; 926 bi = bb->i; 927 bj = bb->j; 928 929 rstart = A->rmap->rstart; 930 931 if (reuse == MAT_INITIAL_MATRIX) { 932 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 933 nzb = 0; /* num of upper triangular entries in mat->B */ 934 for (i = 0; i < m; i++) { 935 nza += (ai[i + 1] - adiag[i]); 936 countB = bi[i + 1] - bi[i]; 937 bjj = bj + bi[i]; 938 for (j = 0; j < countB; j++) { 939 if (garray[bjj[j]] > rstart) nzb++; 940 } 941 } 942 943 nz = nza + nzb; /* total nz of upper triangular part of mat */ 944 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 945 PetscCall(PetscMalloc1(nz, &val)); 946 mumps->nnz = nz; 947 mumps->irn = row; 948 mumps->jcn = col; 949 mumps->val = mumps->val_alloc = val; 950 } else { 951 val = mumps->val; 952 } 953 954 jj = 0; 955 irow = rstart; 956 for (i = 0; i < m; i++) { 957 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 958 v1 = av + adiag[i]; 959 countA = ai[i + 1] - adiag[i]; 960 countB = bi[i + 1] - bi[i]; 961 bjj = bj + bi[i]; 962 v2 = bv + bi[i]; 963 964 /* A-part */ 965 for (j = 0; j < countA; j++) { 966 if (reuse == MAT_INITIAL_MATRIX) { 967 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 968 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 969 } 970 val[jj++] = v1[j]; 971 } 972 973 /* B-part */ 974 for (j = 0; j < countB; j++) { 975 if (garray[bjj[j]] > rstart) { 976 if (reuse == MAT_INITIAL_MATRIX) { 977 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 978 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 979 } 980 val[jj++] = v2[j]; 981 } 982 } 983 irow++; 984 } 985 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 986 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 991 { 992 const PetscScalar *av; 993 const PetscInt M = A->rmap->n; 994 PetscCount i; 995 PetscMUMPSInt *row, *col; 996 Vec v; 997 998 PetscFunctionBegin; 999 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1000 PetscCall(VecGetArrayRead(v, &av)); 1001 if (reuse == MAT_INITIAL_MATRIX) { 1002 PetscCall(PetscMalloc2(M, &row, M, &col)); 1003 for (i = 0; i < M; i++) { 1004 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1005 col[i] = row[i]; 1006 } 1007 mumps->val = (PetscScalar *)av; 1008 mumps->irn = row; 1009 mumps->jcn = col; 1010 mumps->nnz = M; 1011 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1012 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1013 PetscCall(VecRestoreArrayRead(v, &av)); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1018 { 1019 PetscScalar *v; 1020 const PetscInt m = A->rmap->n, N = A->cmap->N; 1021 PetscInt lda; 1022 PetscCount i, j; 1023 PetscMUMPSInt *row, *col; 1024 1025 PetscFunctionBegin; 1026 PetscCall(MatDenseGetArray(A, &v)); 1027 PetscCall(MatDenseGetLDA(A, &lda)); 1028 if (reuse == MAT_INITIAL_MATRIX) { 1029 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1030 for (i = 0; i < m; i++) { 1031 col[i] = 0; 1032 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1033 } 1034 for (j = 1; j < N; j++) { 1035 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1036 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1037 } 1038 if (lda == m) mumps->val = v; 1039 else { 1040 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1041 mumps->val_alloc = mumps->val; 1042 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1043 } 1044 mumps->irn = row; 1045 mumps->jcn = col; 1046 mumps->nnz = m * N; 1047 } else { 1048 if (lda == m && !mumps->nest_vals) mumps->val = v; 1049 else { 1050 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1051 } 1052 } 1053 PetscCall(MatDenseRestoreArray(A, &v)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 1058 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 1059 // and its rows and columns permuted 1060 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 1061 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 1062 { 1063 Mat A; 1064 PetscScalar s[2]; 1065 PetscBool isTrans, isHTrans, compare; 1066 1067 PetscFunctionBegin; 1068 do { 1069 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1070 if (isTrans) { 1071 PetscCall(MatTransposeGetMat(*sub, &A)); 1072 isHTrans = PETSC_FALSE; 1073 } else { 1074 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1075 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 1076 } 1077 compare = (PetscBool)(isTrans || isHTrans); 1078 if (compare) { 1079 if (vshift && vscale) { 1080 PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1081 if (!*conjugate) { 1082 *vshift += s[0] * *vscale; 1083 *vscale *= s[1]; 1084 } else { 1085 *vshift += PetscConj(s[0]) * *vscale; 1086 *vscale *= PetscConj(s[1]); 1087 } 1088 } 1089 if (swap) *swap = (PetscBool)!*swap; 1090 if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 1091 *sub = A; 1092 } 1093 } while (compare); 1094 PetscFunctionReturn(PETSC_SUCCESS); 1095 } 1096 1097 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1098 { 1099 Mat **mats; 1100 PetscInt nr, nc; 1101 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1102 1103 PetscFunctionBegin; 1104 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1105 if (reuse == MAT_INITIAL_MATRIX) { 1106 PetscMUMPSInt *irns, *jcns; 1107 PetscScalar *vals; 1108 PetscCount totnnz, cumnnz, maxnnz; 1109 PetscInt *pjcns_w, Mbs = 0; 1110 IS *rows, *cols; 1111 PetscInt **rows_idx, **cols_idx; 1112 1113 cumnnz = 0; 1114 maxnnz = 0; 1115 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1116 for (PetscInt r = 0; r < nr; r++) { 1117 for (PetscInt c = 0; c < nc; c++) { 1118 Mat sub = mats[r][c]; 1119 1120 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1121 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1122 if (sub) { 1123 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1124 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 1125 MatInfo info; 1126 1127 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 1128 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1129 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1130 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1131 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1132 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1133 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1134 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1135 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1136 1137 if (chol) { 1138 if (r == c) { 1139 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1140 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1141 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1142 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1143 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1144 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1145 } else { 1146 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1147 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1148 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1149 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1150 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1151 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1152 } 1153 } else { 1154 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1155 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1156 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1157 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1158 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1159 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1160 } 1161 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1162 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1163 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1164 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1165 maxnnz = PetscMax(maxnnz, info.nz_used); 1166 } 1167 } 1168 } 1169 1170 /* Allocate total COO */ 1171 totnnz = cumnnz; 1172 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1173 PetscCall(PetscMalloc1(totnnz, &vals)); 1174 1175 /* Handle rows and column maps 1176 We directly map rows and use an SF for the columns */ 1177 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1178 PetscCall(MatNestGetISs(A, rows, cols)); 1179 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1180 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1181 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1182 else (void)maxnnz; 1183 1184 cumnnz = 0; 1185 for (PetscInt r = 0; r < nr; r++) { 1186 for (PetscInt c = 0; c < nc; c++) { 1187 Mat sub = mats[r][c]; 1188 const PetscInt *ridx = rows_idx[r]; 1189 const PetscInt *cidx = cols_idx[c]; 1190 PetscScalar vscale = 1.0, vshift = 0.0; 1191 PetscInt rst, size, bs; 1192 PetscSF csf; 1193 PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 1194 PetscLayout cmap; 1195 PetscInt innz; 1196 1197 mumps->nest_vals_start[r * nc + c] = cumnnz; 1198 if (c == r) { 1199 PetscCall(ISGetSize(rows[r], &size)); 1200 if (!mumps->nest_convert_to_triples[r * nc + c]) { 1201 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1202 } 1203 PetscCall(MatGetBlockSize(sub, &bs)); 1204 Mbs += size / bs; 1205 } 1206 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1207 1208 /* Extract inner blocks if needed */ 1209 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 1210 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1211 1212 /* Get column layout to map off-process columns */ 1213 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1214 1215 /* Get row start to map on-process rows */ 1216 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1217 1218 /* Directly use the mumps datastructure and use C ordering for now */ 1219 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1220 1221 /* Swap the role of rows and columns indices for transposed blocks 1222 since we need values with global final ordering */ 1223 if (swap) { 1224 cidx = rows_idx[r]; 1225 ridx = cols_idx[c]; 1226 } 1227 1228 /* Communicate column indices 1229 This could have been done with a single SF but it would have complicated the code a lot. 1230 But since we do it only once, we pay the price of setting up an SF for each block */ 1231 if (PetscDefined(USE_64BIT_INDICES)) { 1232 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1233 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1234 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1235 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1236 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1237 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1238 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1239 PetscCall(PetscSFDestroy(&csf)); 1240 1241 /* Import indices: use direct map for rows and mapped indices for columns */ 1242 if (swap) { 1243 for (PetscInt k = 0; k < mumps->nnz; k++) { 1244 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1245 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1246 } 1247 } else { 1248 for (PetscInt k = 0; k < mumps->nnz; k++) { 1249 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1250 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1251 } 1252 } 1253 1254 /* Import values to full COO */ 1255 if (conjugate) { /* conjugate the entries */ 1256 PetscScalar *v = vals + cumnnz; 1257 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 1258 } else if (vscale != 1.0) { 1259 PetscScalar *v = vals + cumnnz; 1260 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 1261 } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1262 1263 /* Shift new starting point and sanity check */ 1264 cumnnz += mumps->nnz; 1265 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1266 1267 /* Free scratch memory */ 1268 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1269 PetscCall(PetscFree(mumps->val_alloc)); 1270 mumps->val = NULL; 1271 mumps->nnz = 0; 1272 } 1273 } 1274 if (mumps->id.ICNTL(15) == 1) { 1275 if (Mbs != A->rmap->N) { 1276 PetscMPIInt rank, size; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1279 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1280 if (rank == 0) { 1281 PetscInt shift = 0; 1282 1283 PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 1284 PetscCall(PetscFree(mumps->id.blkptr)); 1285 PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 1286 mumps->id.blkptr[0] = 1; 1287 for (PetscInt i = 0; i < size; ++i) { 1288 for (PetscInt r = 0; r < nr; r++) { 1289 Mat sub = mats[r][r]; 1290 const PetscInt *ranges; 1291 PetscInt bs; 1292 1293 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1294 PetscCall(MatGetOwnershipRanges(sub, &ranges)); 1295 PetscCall(MatGetBlockSize(sub, &bs)); 1296 for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 1297 shift += (ranges[i + 1] - ranges[i]) / bs; 1298 } 1299 } 1300 } 1301 } else mumps->id.ICNTL(15) = 0; 1302 } 1303 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1304 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1305 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1306 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1307 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1308 mumps->nest_vals_start[nr * nc] = cumnnz; 1309 1310 /* Set pointers for final MUMPS data structure */ 1311 mumps->nest_vals = vals; 1312 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1313 mumps->val = vals; 1314 mumps->irn = irns; 1315 mumps->jcn = jcns; 1316 mumps->nnz = cumnnz; 1317 } else { 1318 PetscScalar *oval = mumps->nest_vals; 1319 for (PetscInt r = 0; r < nr; r++) { 1320 for (PetscInt c = 0; c < nc; c++) { 1321 PetscBool conjugate = PETSC_FALSE; 1322 Mat sub = mats[r][c]; 1323 PetscScalar vscale = 1.0, vshift = 0.0; 1324 PetscInt midx = r * nc + c; 1325 1326 if (!mumps->nest_convert_to_triples[midx]) continue; 1327 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 1328 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1329 mumps->val = oval + mumps->nest_vals_start[midx]; 1330 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1331 if (conjugate) { 1332 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1333 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 1334 } else if (vscale != 1.0) { 1335 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1336 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 1337 } 1338 } 1339 } 1340 mumps->val = oval; 1341 } 1342 PetscFunctionReturn(PETSC_SUCCESS); 1343 } 1344 1345 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1346 { 1347 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1348 1349 PetscFunctionBegin; 1350 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1351 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1352 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1353 PetscCall(VecDestroy(&mumps->b_seq)); 1354 PetscCall(VecDestroy(&mumps->x_seq)); 1355 PetscCall(PetscFree(mumps->id.perm_in)); 1356 PetscCall(PetscFree(mumps->id.blkvar)); 1357 PetscCall(PetscFree(mumps->id.blkptr)); 1358 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1359 PetscCall(PetscFree(mumps->val_alloc)); 1360 PetscCall(PetscFree(mumps->info)); 1361 PetscCall(PetscFree(mumps->ICNTL_pre)); 1362 PetscCall(PetscFree(mumps->CNTL_pre)); 1363 PetscCall(MatMumpsResetSchur_Private(mumps)); 1364 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1365 mumps->id.job = JOB_END; 1366 PetscMUMPS_c(mumps); 1367 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1368 if (mumps->mumps_comm != MPI_COMM_NULL) { 1369 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1370 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1371 } 1372 } 1373 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1374 if (mumps->use_petsc_omp_support) { 1375 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1376 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1377 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1378 } 1379 #endif 1380 PetscCall(PetscFree(mumps->ia_alloc)); 1381 PetscCall(PetscFree(mumps->ja_alloc)); 1382 PetscCall(PetscFree(mumps->recvcount)); 1383 PetscCall(PetscFree(mumps->reqs)); 1384 PetscCall(PetscFree(mumps->irhs_loc)); 1385 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1386 PetscCall(PetscFree(mumps->nest_vals)); 1387 PetscCall(PetscFree(A->data)); 1388 1389 /* clear composed functions */ 1390 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1391 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1392 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1393 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1394 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1395 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1396 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1397 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1398 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1399 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1400 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1401 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1402 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1403 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1404 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1409 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1410 { 1411 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1412 const PetscMPIInt ompsize = mumps->omp_comm_size; 1413 PetscInt i, m, M, rstart; 1414 1415 PetscFunctionBegin; 1416 PetscCall(MatGetSize(A, &M, NULL)); 1417 PetscCall(MatGetLocalSize(A, &m, NULL)); 1418 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1419 if (ompsize == 1) { 1420 if (!mumps->irhs_loc) { 1421 mumps->nloc_rhs = (PetscMUMPSInt)m; 1422 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1423 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1424 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1425 } 1426 mumps->id.rhs_loc = (MumpsScalar *)array; 1427 } else { 1428 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1429 const PetscInt *ranges; 1430 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1431 MPI_Group petsc_group, omp_group; 1432 PetscScalar *recvbuf = NULL; 1433 1434 if (mumps->is_omp_master) { 1435 /* Lazily initialize the omp stuff for distributed rhs */ 1436 if (!mumps->irhs_loc) { 1437 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1438 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1439 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1440 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1441 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1442 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1443 1444 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1445 mumps->nloc_rhs = 0; 1446 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1447 for (j = 0; j < ompsize; j++) { 1448 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1449 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1450 } 1451 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1452 for (j = k = 0; j < ompsize; j++) { 1453 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1454 } 1455 1456 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1457 PetscCallMPI(MPI_Group_free(&petsc_group)); 1458 PetscCallMPI(MPI_Group_free(&omp_group)); 1459 } 1460 1461 /* Realloc buffers when current nrhs is bigger than what we have met */ 1462 if (nrhs > mumps->max_nrhs) { 1463 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1464 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1465 mumps->max_nrhs = nrhs; 1466 } 1467 1468 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1469 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1470 mumps->rhs_disps[0] = 0; 1471 for (j = 1; j < ompsize; j++) { 1472 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1473 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1474 } 1475 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1476 } 1477 1478 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1479 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1480 1481 if (mumps->is_omp_master) { 1482 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1483 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1484 for (j = 0; j < ompsize; j++) { 1485 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1486 dst = dstbase; 1487 for (i = 0; i < nrhs; i++) { 1488 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1489 src += mumps->rhs_nrow[j]; 1490 dst += mumps->nloc_rhs; 1491 } 1492 dstbase += mumps->rhs_nrow[j]; 1493 } 1494 } 1495 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1496 } 1497 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1498 } 1499 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1500 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1501 mumps->id.lrhs_loc = mumps->nloc_rhs; 1502 mumps->id.irhs_loc = mumps->irhs_loc; 1503 PetscFunctionReturn(PETSC_SUCCESS); 1504 } 1505 1506 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1507 { 1508 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1509 const PetscScalar *rarray = NULL; 1510 PetscScalar *array; 1511 IS is_iden, is_petsc; 1512 PetscInt i; 1513 PetscBool second_solve = PETSC_FALSE; 1514 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1515 1516 PetscFunctionBegin; 1517 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1518 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1519 &cite1)); 1520 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1521 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1522 &cite2)); 1523 1524 PetscCall(VecFlag(x, A->factorerrortype)); 1525 if (A->factorerrortype) { 1526 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1527 PetscFunctionReturn(PETSC_SUCCESS); 1528 } 1529 1530 mumps->id.nrhs = 1; 1531 if (mumps->petsc_size > 1) { 1532 if (mumps->ICNTL20 == 10) { 1533 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1534 PetscCall(VecGetArrayRead(b, &rarray)); 1535 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1536 } else { 1537 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1538 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1539 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1540 if (!mumps->myid) { 1541 PetscCall(VecGetArray(mumps->b_seq, &array)); 1542 mumps->id.rhs = (MumpsScalar *)array; 1543 } 1544 } 1545 } else { /* petsc_size == 1 */ 1546 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1547 PetscCall(VecCopy(b, x)); 1548 PetscCall(VecGetArray(x, &array)); 1549 mumps->id.rhs = (MumpsScalar *)array; 1550 } 1551 1552 /* 1553 handle condensation step of Schur complement (if any) 1554 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1555 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1556 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1557 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1558 */ 1559 if (mumps->id.size_schur > 0) { 1560 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1561 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1562 second_solve = PETSC_TRUE; 1563 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1564 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1565 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1566 } 1567 /* solve phase */ 1568 mumps->id.job = JOB_SOLVE; 1569 PetscMUMPS_c(mumps); 1570 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1571 1572 /* handle expansion step of Schur complement (if any) */ 1573 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1574 else if (mumps->id.ICNTL(26) == 1) { 1575 PetscCall(MatMumpsSolveSchur_Private(A)); 1576 for (i = 0; i < mumps->id.size_schur; ++i) { 1577 #if !defined(PETSC_USE_COMPLEX) 1578 PetscScalar val = mumps->id.redrhs[i]; 1579 #else 1580 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1581 #endif 1582 array[mumps->id.listvar_schur[i] - 1] = val; 1583 } 1584 } 1585 1586 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1587 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1588 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1589 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1590 } 1591 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1592 PetscInt *isol2_loc = NULL; 1593 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1594 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1595 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1596 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1597 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1598 PetscCall(ISDestroy(&is_iden)); 1599 PetscCall(ISDestroy(&is_petsc)); 1600 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1601 } 1602 1603 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1604 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1605 } 1606 1607 if (mumps->petsc_size > 1) { 1608 if (mumps->ICNTL20 == 10) { 1609 PetscCall(VecRestoreArrayRead(b, &rarray)); 1610 } else if (!mumps->myid) { 1611 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1612 } 1613 } else PetscCall(VecRestoreArray(x, &array)); 1614 1615 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1616 PetscFunctionReturn(PETSC_SUCCESS); 1617 } 1618 1619 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1620 { 1621 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1622 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1623 1624 PetscFunctionBegin; 1625 mumps->id.ICNTL(9) = 0; 1626 PetscCall(MatSolve_MUMPS(A, b, x)); 1627 mumps->id.ICNTL(9) = value; 1628 PetscFunctionReturn(PETSC_SUCCESS); 1629 } 1630 1631 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1632 { 1633 Mat Bt = NULL; 1634 PetscBool denseX, denseB, flg, flgT; 1635 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1636 PetscInt i, nrhs, M, nrhsM; 1637 PetscScalar *array; 1638 const PetscScalar *rbray; 1639 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1640 PetscMUMPSInt *isol_loc, *isol_loc_save; 1641 PetscScalar *bray, *sol_loc, *sol_loc_save; 1642 IS is_to, is_from; 1643 PetscInt k, proc, j, m, myrstart; 1644 const PetscInt *rstart; 1645 Vec v_mpi, msol_loc; 1646 VecScatter scat_sol; 1647 Vec b_seq; 1648 VecScatter scat_rhs; 1649 PetscScalar *aa; 1650 PetscInt spnr, *ia, *ja; 1651 Mat_MPIAIJ *b = NULL; 1652 1653 PetscFunctionBegin; 1654 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1655 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1656 1657 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1658 if (denseB) { 1659 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1660 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1661 } else { /* sparse B */ 1662 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1663 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1664 PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1665 PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1666 /* input B is transpose of actual RHS matrix, 1667 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1668 PetscCall(MatTransposeGetMat(B, &Bt)); 1669 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1670 } 1671 1672 PetscCall(MatGetSize(B, &M, &nrhs)); 1673 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 1674 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1675 mumps->id.lrhs = (PetscMUMPSInt)M; 1676 mumps->id.rhs = NULL; 1677 1678 if (mumps->petsc_size == 1) { 1679 PetscScalar *aa; 1680 PetscInt spnr, *ia, *ja; 1681 PetscBool second_solve = PETSC_FALSE; 1682 1683 PetscCall(MatDenseGetArray(X, &array)); 1684 mumps->id.rhs = (MumpsScalar *)array; 1685 1686 if (denseB) { 1687 /* copy B to X */ 1688 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1689 PetscCall(PetscArraycpy(array, rbray, nrhsM)); 1690 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1691 } else { /* sparse B */ 1692 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1693 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1694 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1695 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1696 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1697 } 1698 /* handle condensation step of Schur complement (if any) */ 1699 if (mumps->id.size_schur > 0) { 1700 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1701 second_solve = PETSC_TRUE; 1702 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1703 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1704 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1705 } 1706 /* solve phase */ 1707 mumps->id.job = JOB_SOLVE; 1708 PetscMUMPS_c(mumps); 1709 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1710 1711 /* handle expansion step of Schur complement (if any) */ 1712 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1713 else if (mumps->id.ICNTL(26) == 1) { 1714 PetscCall(MatMumpsSolveSchur_Private(A)); 1715 for (j = 0; j < nrhs; ++j) 1716 for (i = 0; i < mumps->id.size_schur; ++i) { 1717 #if !defined(PETSC_USE_COMPLEX) 1718 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1719 #else 1720 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1721 #endif 1722 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1723 } 1724 } 1725 if (!denseB) { /* sparse B */ 1726 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1727 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1728 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1729 } 1730 PetscCall(MatDenseRestoreArray(X, &array)); 1731 PetscFunctionReturn(PETSC_SUCCESS); 1732 } 1733 1734 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1735 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1736 1737 /* create msol_loc to hold mumps local solution */ 1738 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1739 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1740 1741 lsol_loc = mumps->id.lsol_loc; 1742 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 1743 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1744 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1745 mumps->id.isol_loc = isol_loc; 1746 1747 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1748 1749 if (denseB) { 1750 if (mumps->ICNTL20 == 10) { 1751 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1752 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1753 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1754 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1755 PetscCall(MatGetLocalSize(B, &m, NULL)); 1756 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); 1757 } else { 1758 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1759 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1760 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1761 0, re-arrange B into desired order, which is a local operation. 1762 */ 1763 1764 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1765 /* wrap dense rhs matrix B into a vector v_mpi */ 1766 PetscCall(MatGetLocalSize(B, &m, NULL)); 1767 PetscCall(MatDenseGetArray(B, &bray)); 1768 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi)); 1769 PetscCall(MatDenseRestoreArray(B, &bray)); 1770 1771 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1772 if (!mumps->myid) { 1773 PetscInt *idx; 1774 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1775 PetscCall(PetscMalloc1(nrhsM, &idx)); 1776 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1777 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 1778 for (j = 0; j < nrhs; j++) { 1779 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1780 } 1781 } 1782 1783 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 1784 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 1785 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 1786 } else { 1787 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1788 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1789 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1790 } 1791 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1792 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1793 PetscCall(ISDestroy(&is_to)); 1794 PetscCall(ISDestroy(&is_from)); 1795 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1796 1797 if (!mumps->myid) { /* define rhs on the host */ 1798 PetscCall(VecGetArray(b_seq, &bray)); 1799 mumps->id.rhs = (MumpsScalar *)bray; 1800 PetscCall(VecRestoreArray(b_seq, &bray)); 1801 } 1802 } 1803 } else { /* sparse B */ 1804 b = (Mat_MPIAIJ *)Bt->data; 1805 1806 /* wrap dense X into a vector v_mpi */ 1807 PetscCall(MatGetLocalSize(X, &m, NULL)); 1808 PetscCall(MatDenseGetArray(X, &bray)); 1809 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi)); 1810 PetscCall(MatDenseRestoreArray(X, &bray)); 1811 1812 if (!mumps->myid) { 1813 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1814 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1815 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1816 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1817 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1818 } else { 1819 mumps->id.irhs_ptr = NULL; 1820 mumps->id.irhs_sparse = NULL; 1821 mumps->id.nz_rhs = 0; 1822 mumps->id.rhs_sparse = NULL; 1823 } 1824 } 1825 1826 /* solve phase */ 1827 mumps->id.job = JOB_SOLVE; 1828 PetscMUMPS_c(mumps); 1829 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1830 1831 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 1832 PetscCall(MatDenseGetArray(X, &array)); 1833 PetscCall(VecPlaceArray(v_mpi, array)); 1834 1835 /* create scatter scat_sol */ 1836 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1837 /* iidx: index for scatter mumps solution to PETSc X */ 1838 1839 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1840 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1841 for (i = 0; i < lsol_loc; i++) { 1842 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1843 1844 for (proc = 0; proc < mumps->petsc_size; proc++) { 1845 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1846 myrstart = rstart[proc]; 1847 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 1848 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 1849 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1850 break; 1851 } 1852 } 1853 1854 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1855 } 1856 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1857 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1858 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1859 PetscCall(ISDestroy(&is_from)); 1860 PetscCall(ISDestroy(&is_to)); 1861 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1862 PetscCall(MatDenseRestoreArray(X, &array)); 1863 1864 /* free spaces */ 1865 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1866 mumps->id.isol_loc = isol_loc_save; 1867 1868 PetscCall(PetscFree2(sol_loc, isol_loc)); 1869 PetscCall(PetscFree(idxx)); 1870 PetscCall(VecDestroy(&msol_loc)); 1871 PetscCall(VecDestroy(&v_mpi)); 1872 if (!denseB) { 1873 if (!mumps->myid) { 1874 b = (Mat_MPIAIJ *)Bt->data; 1875 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1876 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1877 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1878 } 1879 } else { 1880 if (mumps->ICNTL20 == 0) { 1881 PetscCall(VecDestroy(&b_seq)); 1882 PetscCall(VecScatterDestroy(&scat_rhs)); 1883 } 1884 } 1885 PetscCall(VecScatterDestroy(&scat_sol)); 1886 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1887 PetscFunctionReturn(PETSC_SUCCESS); 1888 } 1889 1890 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1891 { 1892 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1893 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1894 1895 PetscFunctionBegin; 1896 mumps->id.ICNTL(9) = 0; 1897 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1898 mumps->id.ICNTL(9) = value; 1899 PetscFunctionReturn(PETSC_SUCCESS); 1900 } 1901 1902 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1903 { 1904 PetscBool flg; 1905 Mat B; 1906 1907 PetscFunctionBegin; 1908 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1909 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1910 1911 /* Create B=Bt^T that uses Bt's data structure */ 1912 PetscCall(MatCreateTranspose(Bt, &B)); 1913 1914 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1915 PetscCall(MatDestroy(&B)); 1916 PetscFunctionReturn(PETSC_SUCCESS); 1917 } 1918 1919 #if !defined(PETSC_USE_COMPLEX) 1920 /* 1921 input: 1922 F: numeric factor 1923 output: 1924 nneg: total number of negative pivots 1925 nzero: total number of zero pivots 1926 npos: (global dimension of F) - nneg - nzero 1927 */ 1928 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1929 { 1930 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1931 PetscMPIInt size; 1932 1933 PetscFunctionBegin; 1934 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1935 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1936 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1937 1938 if (nneg) *nneg = mumps->id.INFOG(12); 1939 if (nzero || npos) { 1940 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1941 if (nzero) *nzero = mumps->id.INFOG(28); 1942 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1943 } 1944 PetscFunctionReturn(PETSC_SUCCESS); 1945 } 1946 #endif 1947 1948 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1949 { 1950 PetscMPIInt nreqs; 1951 PetscMUMPSInt *irn, *jcn; 1952 PetscMPIInt count; 1953 PetscCount totnnz, remain; 1954 const PetscInt osize = mumps->omp_comm_size; 1955 PetscScalar *val; 1956 1957 PetscFunctionBegin; 1958 if (osize > 1) { 1959 if (reuse == MAT_INITIAL_MATRIX) { 1960 /* master first gathers counts of nonzeros to receive */ 1961 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1962 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1963 1964 /* Then each computes number of send/recvs */ 1965 if (mumps->is_omp_master) { 1966 /* Start from 1 since self communication is not done in MPI */ 1967 nreqs = 0; 1968 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1969 } else { 1970 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 1971 } 1972 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1973 1974 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1975 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1976 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1977 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1978 */ 1979 nreqs = 0; /* counter for actual send/recvs */ 1980 if (mumps->is_omp_master) { 1981 totnnz = 0; 1982 1983 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1984 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1985 PetscCall(PetscMalloc1(totnnz, &val)); 1986 1987 /* Self communication */ 1988 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1989 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1990 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1991 1992 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1993 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1994 PetscCall(PetscFree(mumps->val_alloc)); 1995 mumps->nnz = totnnz; 1996 mumps->irn = irn; 1997 mumps->jcn = jcn; 1998 mumps->val = mumps->val_alloc = val; 1999 2000 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2001 jcn += mumps->recvcount[0]; 2002 val += mumps->recvcount[0]; 2003 2004 /* Remote communication */ 2005 for (PetscMPIInt i = 1; i < osize; i++) { 2006 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2007 remain = mumps->recvcount[i] - count; 2008 while (count > 0) { 2009 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2010 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2011 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2012 irn += count; 2013 jcn += count; 2014 val += count; 2015 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2016 remain -= count; 2017 } 2018 } 2019 } else { 2020 irn = mumps->irn; 2021 jcn = mumps->jcn; 2022 val = mumps->val; 2023 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2024 remain = mumps->nnz - count; 2025 while (count > 0) { 2026 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2027 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2028 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2029 irn += count; 2030 jcn += count; 2031 val += count; 2032 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2033 remain -= count; 2034 } 2035 } 2036 } else { 2037 nreqs = 0; 2038 if (mumps->is_omp_master) { 2039 val = mumps->val + mumps->recvcount[0]; 2040 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 2041 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2042 remain = mumps->recvcount[i] - count; 2043 while (count > 0) { 2044 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2045 val += count; 2046 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2047 remain -= count; 2048 } 2049 } 2050 } else { 2051 val = mumps->val; 2052 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2053 remain = mumps->nnz - count; 2054 while (count > 0) { 2055 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2056 val += count; 2057 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2058 remain -= count; 2059 } 2060 } 2061 } 2062 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2063 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2064 } 2065 PetscFunctionReturn(PETSC_SUCCESS); 2066 } 2067 2068 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2069 { 2070 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2071 PetscBool isMPIAIJ; 2072 2073 PetscFunctionBegin; 2074 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2075 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2076 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2081 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2082 2083 /* numerical factorization phase */ 2084 mumps->id.job = JOB_FACTNUMERIC; 2085 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2086 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 2087 } else { 2088 mumps->id.a_loc = (MumpsScalar *)mumps->val; 2089 } 2090 PetscMUMPS_c(mumps); 2091 if (mumps->id.INFOG(1) < 0) { 2092 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2093 if (mumps->id.INFOG(1) == -10) { 2094 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2095 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2096 } else if (mumps->id.INFOG(1) == -13) { 2097 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2098 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2099 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2100 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2101 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2102 } else { 2103 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2104 F->factorerrortype = MAT_FACTOR_OTHER; 2105 } 2106 } 2107 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2108 2109 F->assembled = PETSC_TRUE; 2110 2111 if (F->schur) { /* reset Schur status to unfactored */ 2112 #if defined(PETSC_HAVE_CUDA) 2113 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2114 #endif 2115 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2116 mumps->id.ICNTL(19) = 2; 2117 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2118 } 2119 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2120 } 2121 2122 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2123 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2124 2125 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2126 if (mumps->petsc_size > 1) { 2127 PetscInt lsol_loc; 2128 PetscScalar *sol_loc; 2129 2130 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2131 2132 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2133 if (mumps->x_seq) { 2134 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2135 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2136 PetscCall(VecDestroy(&mumps->x_seq)); 2137 } 2138 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2139 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2140 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2141 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2142 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2143 } 2144 PetscCall(PetscLogFlops((double)mumps->id.RINFO(2))); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 /* Sets MUMPS options from the options database */ 2149 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2150 { 2151 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2152 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2153 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2154 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2155 MumpsScalar *arr; 2156 2157 PetscFunctionBegin; 2158 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2159 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2160 PetscInt nthreads = 0; 2161 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2162 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2163 PetscMUMPSInt nblk, *blkvar, *blkptr; 2164 2165 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2166 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2167 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2168 2169 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2170 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2171 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2172 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2173 if (mumps->use_petsc_omp_support) { 2174 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2175 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2176 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2177 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2178 #else 2179 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2180 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2181 #endif 2182 } else { 2183 mumps->omp_comm = PETSC_COMM_SELF; 2184 mumps->mumps_comm = mumps->petsc_comm; 2185 mumps->is_omp_master = PETSC_TRUE; 2186 } 2187 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2188 mumps->reqs = NULL; 2189 mumps->tag = 0; 2190 2191 if (mumps->mumps_comm != MPI_COMM_NULL) { 2192 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2193 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2194 MPI_Comm comm; 2195 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2196 mumps->mumps_comm = comm; 2197 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2198 } 2199 2200 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2201 mumps->id.job = JOB_INIT; 2202 mumps->id.par = 1; /* host participates factorizaton and solve */ 2203 mumps->id.sym = mumps->sym; 2204 2205 size = mumps->id.size_schur; 2206 arr = mumps->id.schur; 2207 listvar_schur = mumps->id.listvar_schur; 2208 nblk = mumps->id.nblk; 2209 blkvar = mumps->id.blkvar; 2210 blkptr = mumps->id.blkptr; 2211 if (PetscDefined(USE_DEBUG)) { 2212 for (PetscInt i = 0; i < size; i++) 2213 PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1, 2214 A->rmap->N); 2215 } 2216 2217 PetscMUMPS_c(mumps); 2218 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2219 2220 /* set PETSc-MUMPS default options - override MUMPS default */ 2221 mumps->id.ICNTL(3) = 0; 2222 mumps->id.ICNTL(4) = 0; 2223 if (mumps->petsc_size == 1) { 2224 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2225 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2226 } else { 2227 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2228 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2229 } 2230 if (nblk && blkptr) { 2231 mumps->id.ICNTL(15) = 1; 2232 mumps->id.nblk = nblk; 2233 mumps->id.blkvar = blkvar; 2234 mumps->id.blkptr = blkptr; 2235 } 2236 2237 /* restore cached ICNTL and CNTL values */ 2238 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2239 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2240 PetscCall(PetscFree(mumps->ICNTL_pre)); 2241 PetscCall(PetscFree(mumps->CNTL_pre)); 2242 2243 if (schur) { 2244 mumps->id.size_schur = size; 2245 mumps->id.schur_lld = size; 2246 mumps->id.schur = arr; 2247 mumps->id.listvar_schur = listvar_schur; 2248 if (mumps->petsc_size > 1) { 2249 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2250 2251 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2252 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2253 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm)); 2254 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2255 } else { 2256 if (F->factortype == MAT_FACTOR_LU) { 2257 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2258 } else { 2259 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2260 } 2261 } 2262 mumps->id.ICNTL(26) = -1; 2263 } 2264 2265 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2266 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2267 */ 2268 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2269 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2270 2271 mumps->scat_rhs = NULL; 2272 mumps->scat_sol = NULL; 2273 } 2274 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2275 if (flg) mumps->id.ICNTL(1) = icntl; 2276 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2277 if (flg) mumps->id.ICNTL(2) = icntl; 2278 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2279 if (flg) mumps->id.ICNTL(3) = icntl; 2280 2281 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2282 if (flg) mumps->id.ICNTL(4) = icntl; 2283 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2284 2285 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2286 if (flg) mumps->id.ICNTL(6) = icntl; 2287 2288 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2289 if (flg) { 2290 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2291 mumps->id.ICNTL(7) = icntl; 2292 } 2293 2294 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2295 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2296 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2297 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2298 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2299 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2300 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2301 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2302 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2303 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2304 if (flg) { 2305 if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2306 else if (mumps->id.ICNTL(15) > 0) { 2307 const PetscInt *bsizes; 2308 PetscInt nblocks, p, *blkptr = NULL; 2309 PetscMPIInt *recvcounts, *displs, n; 2310 PetscMPIInt rank, size = 0; 2311 2312 PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 2313 flg = PETSC_TRUE; 2314 for (p = 0; p < nblocks; ++p) { 2315 if (bsizes[p] > 1) break; 2316 } 2317 if (p == nblocks) flg = PETSC_FALSE; 2318 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 2319 if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 2320 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2321 if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2322 PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 2323 PetscCall(PetscMPIIntCast(nblocks, &n)); 2324 PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 2325 for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 2326 PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 2327 PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2328 PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2329 if (rank == 0) { 2330 blkptr[0] = 1; 2331 for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 2332 PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 2333 } 2334 PetscCall(PetscFree2(recvcounts, displs)); 2335 PetscCall(PetscFree(blkptr)); 2336 } 2337 } 2338 } 2339 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2340 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2341 PetscCall(MatDestroy(&F->schur)); 2342 PetscCall(MatMumpsResetSchur_Private(mumps)); 2343 } 2344 2345 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2346 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2347 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2348 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2349 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2350 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2351 */ 2352 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2353 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2354 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2355 #endif 2356 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2357 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2358 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2359 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2360 #endif 2361 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2362 2363 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2364 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2365 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2366 if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 2367 2368 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2369 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2370 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2371 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2372 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2373 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2374 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2375 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2376 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2377 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2378 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2379 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2380 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2381 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2382 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2383 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2384 2385 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2386 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2387 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2388 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2389 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2390 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2391 2392 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2393 2394 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2395 if (ninfo) { 2396 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2397 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2398 mumps->ninfo = ninfo; 2399 for (i = 0; i < ninfo; i++) { 2400 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2401 mumps->info[i] = info[i]; 2402 } 2403 } 2404 PetscOptionsEnd(); 2405 PetscFunctionReturn(PETSC_SUCCESS); 2406 } 2407 2408 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2409 { 2410 PetscFunctionBegin; 2411 if (mumps->id.INFOG(1) < 0) { 2412 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2413 if (mumps->id.INFOG(1) == -6) { 2414 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2415 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2416 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2417 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2418 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2419 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2420 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2421 } else { 2422 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2423 F->factorerrortype = MAT_FACTOR_OTHER; 2424 } 2425 } 2426 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2427 PetscFunctionReturn(PETSC_SUCCESS); 2428 } 2429 2430 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2431 { 2432 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2433 Vec b; 2434 const PetscInt M = A->rmap->N; 2435 2436 PetscFunctionBegin; 2437 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2438 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2439 PetscFunctionReturn(PETSC_SUCCESS); 2440 } 2441 2442 /* Set MUMPS options from the options database */ 2443 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2444 2445 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2446 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2447 2448 /* analysis phase */ 2449 mumps->id.job = JOB_FACTSYMBOLIC; 2450 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2451 switch (mumps->id.ICNTL(18)) { 2452 case 0: /* centralized assembled matrix input */ 2453 if (!mumps->myid) { 2454 mumps->id.nnz = mumps->nnz; 2455 mumps->id.irn = mumps->irn; 2456 mumps->id.jcn = mumps->jcn; 2457 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2458 if (r && mumps->id.ICNTL(7) == 7) { 2459 mumps->id.ICNTL(7) = 1; 2460 if (!mumps->myid) { 2461 const PetscInt *idx; 2462 PetscInt i; 2463 2464 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2465 PetscCall(ISGetIndices(r, &idx)); 2466 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2467 PetscCall(ISRestoreIndices(r, &idx)); 2468 } 2469 } 2470 } 2471 break; 2472 case 3: /* distributed assembled matrix input (size>1) */ 2473 mumps->id.nnz_loc = mumps->nnz; 2474 mumps->id.irn_loc = mumps->irn; 2475 mumps->id.jcn_loc = mumps->jcn; 2476 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2477 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2478 PetscCall(MatCreateVecs(A, NULL, &b)); 2479 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2480 PetscCall(VecDestroy(&b)); 2481 } 2482 break; 2483 } 2484 PetscMUMPS_c(mumps); 2485 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2486 2487 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2488 F->ops->solve = MatSolve_MUMPS; 2489 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2490 F->ops->matsolve = MatMatSolve_MUMPS; 2491 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2492 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2493 2494 mumps->matstruc = SAME_NONZERO_PATTERN; 2495 PetscFunctionReturn(PETSC_SUCCESS); 2496 } 2497 2498 /* Note the PETSc r and c permutations are ignored */ 2499 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2500 { 2501 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2502 Vec b; 2503 const PetscInt M = A->rmap->N; 2504 2505 PetscFunctionBegin; 2506 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2507 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2508 PetscFunctionReturn(PETSC_SUCCESS); 2509 } 2510 2511 /* Set MUMPS options from the options database */ 2512 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2513 2514 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2515 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2516 2517 /* analysis phase */ 2518 mumps->id.job = JOB_FACTSYMBOLIC; 2519 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2520 switch (mumps->id.ICNTL(18)) { 2521 case 0: /* centralized assembled matrix input */ 2522 if (!mumps->myid) { 2523 mumps->id.nnz = mumps->nnz; 2524 mumps->id.irn = mumps->irn; 2525 mumps->id.jcn = mumps->jcn; 2526 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2527 } 2528 break; 2529 case 3: /* distributed assembled matrix input (size>1) */ 2530 mumps->id.nnz_loc = mumps->nnz; 2531 mumps->id.irn_loc = mumps->irn; 2532 mumps->id.jcn_loc = mumps->jcn; 2533 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2534 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2535 PetscCall(MatCreateVecs(A, NULL, &b)); 2536 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2537 PetscCall(VecDestroy(&b)); 2538 } 2539 break; 2540 } 2541 PetscMUMPS_c(mumps); 2542 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2543 2544 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2545 F->ops->solve = MatSolve_MUMPS; 2546 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2547 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2548 2549 mumps->matstruc = SAME_NONZERO_PATTERN; 2550 PetscFunctionReturn(PETSC_SUCCESS); 2551 } 2552 2553 /* Note the PETSc r permutation and factor info are ignored */ 2554 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 2555 { 2556 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2557 Vec b; 2558 const PetscInt M = A->rmap->N; 2559 2560 PetscFunctionBegin; 2561 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2562 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 /* Set MUMPS options from the options database */ 2567 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2568 2569 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2570 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2571 2572 /* analysis phase */ 2573 mumps->id.job = JOB_FACTSYMBOLIC; 2574 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2575 switch (mumps->id.ICNTL(18)) { 2576 case 0: /* centralized assembled matrix input */ 2577 if (!mumps->myid) { 2578 mumps->id.nnz = mumps->nnz; 2579 mumps->id.irn = mumps->irn; 2580 mumps->id.jcn = mumps->jcn; 2581 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2582 } 2583 break; 2584 case 3: /* distributed assembled matrix input (size>1) */ 2585 mumps->id.nnz_loc = mumps->nnz; 2586 mumps->id.irn_loc = mumps->irn; 2587 mumps->id.jcn_loc = mumps->jcn; 2588 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2589 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2590 PetscCall(MatCreateVecs(A, NULL, &b)); 2591 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2592 PetscCall(VecDestroy(&b)); 2593 } 2594 break; 2595 } 2596 PetscMUMPS_c(mumps); 2597 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2598 2599 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2600 F->ops->solve = MatSolve_MUMPS; 2601 F->ops->solvetranspose = MatSolve_MUMPS; 2602 F->ops->matsolve = MatMatSolve_MUMPS; 2603 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2604 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2605 #if defined(PETSC_USE_COMPLEX) 2606 F->ops->getinertia = NULL; 2607 #else 2608 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2609 #endif 2610 2611 mumps->matstruc = SAME_NONZERO_PATTERN; 2612 PetscFunctionReturn(PETSC_SUCCESS); 2613 } 2614 2615 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2616 { 2617 PetscBool isascii; 2618 PetscViewerFormat format; 2619 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2620 2621 PetscFunctionBegin; 2622 /* check if matrix is mumps type */ 2623 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2624 2625 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2626 if (isascii) { 2627 PetscCall(PetscViewerGetFormat(viewer, &format)); 2628 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2629 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2630 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2631 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2632 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2633 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2634 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2635 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2636 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2637 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2638 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2639 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2640 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2641 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2642 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2643 if (mumps->id.ICNTL(11) > 0) { 2644 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)mumps->id.RINFOG(4))); 2645 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)mumps->id.RINFOG(5))); 2646 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)mumps->id.RINFOG(6))); 2647 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8))); 2648 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)mumps->id.RINFOG(9))); 2649 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11))); 2650 } 2651 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2652 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2653 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2654 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2655 /* ICNTL(15-17) not used */ 2656 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2657 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2658 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2659 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2660 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2661 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2662 2663 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2664 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2665 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2666 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2667 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2668 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2669 2670 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2671 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2672 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2673 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2674 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2675 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 2676 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2677 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 2678 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 2679 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2680 2681 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)mumps->id.CNTL(1))); 2682 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2))); 2683 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)mumps->id.CNTL(3))); 2684 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)mumps->id.CNTL(4))); 2685 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)mumps->id.CNTL(5))); 2686 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)mumps->id.CNTL(7))); 2687 2688 /* information local to each processor */ 2689 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2690 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2691 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1))); 2692 PetscCall(PetscViewerFlush(viewer)); 2693 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2694 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2))); 2695 PetscCall(PetscViewerFlush(viewer)); 2696 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2697 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3))); 2698 PetscCall(PetscViewerFlush(viewer)); 2699 2700 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2701 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2702 PetscCall(PetscViewerFlush(viewer)); 2703 2704 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2705 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2706 PetscCall(PetscViewerFlush(viewer)); 2707 2708 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2709 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2710 PetscCall(PetscViewerFlush(viewer)); 2711 2712 if (mumps->ninfo && mumps->ninfo <= 80) { 2713 PetscInt i; 2714 for (i = 0; i < mumps->ninfo; i++) { 2715 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2716 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2717 PetscCall(PetscViewerFlush(viewer)); 2718 } 2719 } 2720 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2721 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2722 2723 if (mumps->myid == 0) { /* information from the host */ 2724 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1))); 2725 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2))); 2726 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3))); 2727 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2728 2729 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2730 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2731 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2732 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2733 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2734 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2735 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2736 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2737 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2738 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2739 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2740 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2741 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2742 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2743 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2744 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2745 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2746 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2747 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2748 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2749 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2750 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2751 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2752 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2753 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2754 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2755 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2756 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2757 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2758 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2759 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2760 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2761 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2762 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2763 } 2764 } 2765 } 2766 PetscFunctionReturn(PETSC_SUCCESS); 2767 } 2768 2769 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 2770 { 2771 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2772 2773 PetscFunctionBegin; 2774 info->block_size = 1.0; 2775 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2776 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2777 info->nz_unneeded = 0.0; 2778 info->assemblies = 0.0; 2779 info->mallocs = 0.0; 2780 info->memory = 0.0; 2781 info->fill_ratio_given = 0; 2782 info->fill_ratio_needed = 0; 2783 info->factor_mallocs = 0; 2784 PetscFunctionReturn(PETSC_SUCCESS); 2785 } 2786 2787 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2788 { 2789 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2790 const PetscScalar *arr; 2791 const PetscInt *idxs; 2792 PetscInt size, i; 2793 2794 PetscFunctionBegin; 2795 PetscCall(ISGetLocalSize(is, &size)); 2796 /* Schur complement matrix */ 2797 PetscCall(MatDestroy(&F->schur)); 2798 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2799 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2800 mumps->id.schur = (MumpsScalar *)arr; 2801 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 2802 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 2803 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2804 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2805 2806 /* MUMPS expects Fortran style indices */ 2807 PetscCall(PetscFree(mumps->id.listvar_schur)); 2808 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2809 PetscCall(ISGetIndices(is, &idxs)); 2810 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 2811 PetscCall(ISRestoreIndices(is, &idxs)); 2812 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2813 mumps->id.ICNTL(26) = -1; 2814 PetscFunctionReturn(PETSC_SUCCESS); 2815 } 2816 2817 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2818 { 2819 Mat St; 2820 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2821 PetscScalar *array; 2822 2823 PetscFunctionBegin; 2824 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2825 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2826 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2827 PetscCall(MatSetType(St, MATDENSE)); 2828 PetscCall(MatSetUp(St)); 2829 PetscCall(MatDenseGetArray(St, &array)); 2830 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2831 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2832 PetscInt i, j, N = mumps->id.size_schur; 2833 for (i = 0; i < N; i++) { 2834 for (j = 0; j < N; j++) { 2835 #if !defined(PETSC_USE_COMPLEX) 2836 PetscScalar val = mumps->id.schur[i * N + j]; 2837 #else 2838 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2839 #endif 2840 array[j * N + i] = val; 2841 } 2842 } 2843 } else { /* stored by columns */ 2844 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2845 } 2846 } else { /* either full or lower-triangular (not packed) */ 2847 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2848 PetscInt i, j, N = mumps->id.size_schur; 2849 for (i = 0; i < N; i++) { 2850 for (j = i; j < N; j++) { 2851 #if !defined(PETSC_USE_COMPLEX) 2852 PetscScalar val = mumps->id.schur[i * N + j]; 2853 #else 2854 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2855 #endif 2856 array[i * N + j] = array[j * N + i] = val; 2857 } 2858 } 2859 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2860 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2861 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2862 PetscInt i, j, N = mumps->id.size_schur; 2863 for (i = 0; i < N; i++) { 2864 for (j = 0; j < i + 1; j++) { 2865 #if !defined(PETSC_USE_COMPLEX) 2866 PetscScalar val = mumps->id.schur[i * N + j]; 2867 #else 2868 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2869 #endif 2870 array[i * N + j] = array[j * N + i] = val; 2871 } 2872 } 2873 } 2874 } 2875 PetscCall(MatDenseRestoreArray(St, &array)); 2876 *S = St; 2877 PetscFunctionReturn(PETSC_SUCCESS); 2878 } 2879 2880 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2881 { 2882 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2883 2884 PetscFunctionBegin; 2885 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2886 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2887 for (i = 0; i < nICNTL_pre; ++i) 2888 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2889 if (i == nICNTL_pre) { /* not already cached */ 2890 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2891 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2892 mumps->ICNTL_pre[0]++; 2893 } 2894 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 2895 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2896 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2897 PetscFunctionReturn(PETSC_SUCCESS); 2898 } 2899 2900 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2901 { 2902 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2903 2904 PetscFunctionBegin; 2905 if (mumps->id.job == JOB_NULL) { 2906 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2907 *ival = 0; 2908 for (i = 0; i < nICNTL_pre; ++i) { 2909 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2910 } 2911 } else *ival = mumps->id.ICNTL(icntl); 2912 PetscFunctionReturn(PETSC_SUCCESS); 2913 } 2914 2915 /*@ 2916 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2917 2918 Logically Collective 2919 2920 Input Parameters: 2921 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2922 . icntl - index of MUMPS parameter array `ICNTL()` 2923 - ival - value of MUMPS `ICNTL(icntl)` 2924 2925 Options Database Key: 2926 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 2927 2928 Level: beginner 2929 2930 Note: 2931 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 2932 2933 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2934 @*/ 2935 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2936 { 2937 PetscFunctionBegin; 2938 PetscValidType(F, 1); 2939 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2940 PetscValidLogicalCollectiveInt(F, icntl, 2); 2941 PetscValidLogicalCollectiveInt(F, ival, 3); 2942 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2943 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2944 PetscFunctionReturn(PETSC_SUCCESS); 2945 } 2946 2947 /*@ 2948 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 2949 2950 Logically Collective 2951 2952 Input Parameters: 2953 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 2954 - icntl - index of MUMPS parameter array ICNTL() 2955 2956 Output Parameter: 2957 . ival - value of MUMPS ICNTL(icntl) 2958 2959 Level: beginner 2960 2961 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2962 @*/ 2963 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2964 { 2965 PetscFunctionBegin; 2966 PetscValidType(F, 1); 2967 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2968 PetscValidLogicalCollectiveInt(F, icntl, 2); 2969 PetscAssertPointer(ival, 3); 2970 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2971 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2972 PetscFunctionReturn(PETSC_SUCCESS); 2973 } 2974 2975 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2976 { 2977 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2978 2979 PetscFunctionBegin; 2980 if (mumps->id.job == JOB_NULL) { 2981 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2982 for (i = 0; i < nCNTL_pre; ++i) 2983 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2984 if (i == nCNTL_pre) { 2985 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2986 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2987 mumps->CNTL_pre[0]++; 2988 } 2989 mumps->CNTL_pre[1 + 2 * i] = icntl; 2990 mumps->CNTL_pre[2 + 2 * i] = val; 2991 } else mumps->id.CNTL(icntl) = val; 2992 PetscFunctionReturn(PETSC_SUCCESS); 2993 } 2994 2995 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2996 { 2997 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2998 2999 PetscFunctionBegin; 3000 if (mumps->id.job == JOB_NULL) { 3001 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3002 *val = 0.0; 3003 for (i = 0; i < nCNTL_pre; ++i) { 3004 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 3005 } 3006 } else *val = mumps->id.CNTL(icntl); 3007 PetscFunctionReturn(PETSC_SUCCESS); 3008 } 3009 3010 /*@ 3011 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3012 3013 Logically Collective 3014 3015 Input Parameters: 3016 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3017 . icntl - index of MUMPS parameter array `CNTL()` 3018 - val - value of MUMPS `CNTL(icntl)` 3019 3020 Options Database Key: 3021 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 3022 3023 Level: beginner 3024 3025 Note: 3026 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3027 3028 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3029 @*/ 3030 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3031 { 3032 PetscFunctionBegin; 3033 PetscValidType(F, 1); 3034 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3035 PetscValidLogicalCollectiveInt(F, icntl, 2); 3036 PetscValidLogicalCollectiveReal(F, val, 3); 3037 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3038 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 3039 PetscFunctionReturn(PETSC_SUCCESS); 3040 } 3041 3042 /*@ 3043 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3044 3045 Logically Collective 3046 3047 Input Parameters: 3048 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3049 - icntl - index of MUMPS parameter array CNTL() 3050 3051 Output Parameter: 3052 . val - value of MUMPS CNTL(icntl) 3053 3054 Level: beginner 3055 3056 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3057 @*/ 3058 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3059 { 3060 PetscFunctionBegin; 3061 PetscValidType(F, 1); 3062 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3063 PetscValidLogicalCollectiveInt(F, icntl, 2); 3064 PetscAssertPointer(val, 3); 3065 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3066 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3067 PetscFunctionReturn(PETSC_SUCCESS); 3068 } 3069 3070 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3071 { 3072 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3073 3074 PetscFunctionBegin; 3075 *info = mumps->id.INFO(icntl); 3076 PetscFunctionReturn(PETSC_SUCCESS); 3077 } 3078 3079 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3080 { 3081 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3082 3083 PetscFunctionBegin; 3084 *infog = mumps->id.INFOG(icntl); 3085 PetscFunctionReturn(PETSC_SUCCESS); 3086 } 3087 3088 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3089 { 3090 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3091 3092 PetscFunctionBegin; 3093 *rinfo = mumps->id.RINFO(icntl); 3094 PetscFunctionReturn(PETSC_SUCCESS); 3095 } 3096 3097 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3098 { 3099 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3100 3101 PetscFunctionBegin; 3102 *rinfog = mumps->id.RINFOG(icntl); 3103 PetscFunctionReturn(PETSC_SUCCESS); 3104 } 3105 3106 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 3107 { 3108 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3109 3110 PetscFunctionBegin; 3111 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 3112 *size = 0; 3113 *array = NULL; 3114 if (!mumps->myid) { 3115 *size = mumps->id.INFOG(28); 3116 PetscCall(PetscMalloc1(*size, array)); 3117 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 3118 } 3119 PetscFunctionReturn(PETSC_SUCCESS); 3120 } 3121 3122 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3123 { 3124 Mat Bt = NULL, Btseq = NULL; 3125 PetscBool flg; 3126 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3127 PetscScalar *aa; 3128 PetscInt spnr, *ia, *ja, M, nrhs; 3129 3130 PetscFunctionBegin; 3131 PetscAssertPointer(spRHS, 2); 3132 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3133 PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3134 PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 3135 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3136 3137 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3138 3139 if (mumps->petsc_size > 1) { 3140 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3141 Btseq = b->A; 3142 } else { 3143 Btseq = Bt; 3144 } 3145 3146 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3147 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3148 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3149 mumps->id.rhs = NULL; 3150 3151 if (!mumps->myid) { 3152 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3153 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3154 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3155 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3156 mumps->id.rhs_sparse = (MumpsScalar *)aa; 3157 } else { 3158 mumps->id.irhs_ptr = NULL; 3159 mumps->id.irhs_sparse = NULL; 3160 mumps->id.nz_rhs = 0; 3161 mumps->id.rhs_sparse = NULL; 3162 } 3163 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3164 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3165 3166 /* solve phase */ 3167 mumps->id.job = JOB_SOLVE; 3168 PetscMUMPS_c(mumps); 3169 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3170 3171 if (!mumps->myid) { 3172 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3173 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3174 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3175 } 3176 PetscFunctionReturn(PETSC_SUCCESS); 3177 } 3178 3179 /*@ 3180 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3181 3182 Logically Collective 3183 3184 Input Parameter: 3185 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3186 3187 Output Parameter: 3188 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3189 3190 Level: beginner 3191 3192 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3193 @*/ 3194 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3195 { 3196 PetscFunctionBegin; 3197 PetscValidType(F, 1); 3198 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3199 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3200 PetscFunctionReturn(PETSC_SUCCESS); 3201 } 3202 3203 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3204 { 3205 Mat spRHS; 3206 3207 PetscFunctionBegin; 3208 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3209 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3210 PetscCall(MatDestroy(&spRHS)); 3211 PetscFunctionReturn(PETSC_SUCCESS); 3212 } 3213 3214 /*@ 3215 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3216 3217 Logically Collective 3218 3219 Input Parameter: 3220 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3221 3222 Output Parameter: 3223 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3224 3225 Level: beginner 3226 3227 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3228 @*/ 3229 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3230 { 3231 PetscBool flg; 3232 3233 PetscFunctionBegin; 3234 PetscValidType(F, 1); 3235 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3236 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3237 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3238 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3239 PetscFunctionReturn(PETSC_SUCCESS); 3240 } 3241 3242 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3243 { 3244 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3245 3246 PetscFunctionBegin; 3247 if (nblk) { 3248 PetscAssertPointer(blkptr, 4); 3249 PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 3250 PetscCall(PetscFree(mumps->id.blkptr)); 3251 PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 3252 for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3253 mumps->id.ICNTL(15) = 1; 3254 if (blkvar) { 3255 PetscCall(PetscFree(mumps->id.blkvar)); 3256 PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 3257 for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 3258 } 3259 } else { 3260 PetscCall(PetscFree(mumps->id.blkptr)); 3261 PetscCall(PetscFree(mumps->id.blkvar)); 3262 mumps->id.ICNTL(15) = 0; 3263 } 3264 PetscFunctionReturn(PETSC_SUCCESS); 3265 } 3266 3267 /*@ 3268 MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 3269 3270 Not collective, only relevant on the first process of the MPI communicator 3271 3272 Input Parameters: 3273 + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3274 . nblk - the number of blocks 3275 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 3276 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 3277 3278 Level: advanced 3279 3280 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 3281 @*/ 3282 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3283 { 3284 PetscFunctionBegin; 3285 PetscValidType(F, 1); 3286 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3287 PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 3288 PetscFunctionReturn(PETSC_SUCCESS); 3289 } 3290 3291 /*@ 3292 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3293 3294 Logically Collective 3295 3296 Input Parameters: 3297 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3298 - icntl - index of MUMPS parameter array INFO() 3299 3300 Output Parameter: 3301 . ival - value of MUMPS INFO(icntl) 3302 3303 Level: beginner 3304 3305 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3306 @*/ 3307 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3308 { 3309 PetscFunctionBegin; 3310 PetscValidType(F, 1); 3311 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3312 PetscAssertPointer(ival, 3); 3313 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3314 PetscFunctionReturn(PETSC_SUCCESS); 3315 } 3316 3317 /*@ 3318 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3319 3320 Logically Collective 3321 3322 Input Parameters: 3323 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3324 - icntl - index of MUMPS parameter array INFOG() 3325 3326 Output Parameter: 3327 . ival - value of MUMPS INFOG(icntl) 3328 3329 Level: beginner 3330 3331 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3332 @*/ 3333 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3334 { 3335 PetscFunctionBegin; 3336 PetscValidType(F, 1); 3337 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3338 PetscAssertPointer(ival, 3); 3339 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3340 PetscFunctionReturn(PETSC_SUCCESS); 3341 } 3342 3343 /*@ 3344 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3345 3346 Logically Collective 3347 3348 Input Parameters: 3349 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3350 - icntl - index of MUMPS parameter array RINFO() 3351 3352 Output Parameter: 3353 . val - value of MUMPS RINFO(icntl) 3354 3355 Level: beginner 3356 3357 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3358 @*/ 3359 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3360 { 3361 PetscFunctionBegin; 3362 PetscValidType(F, 1); 3363 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3364 PetscAssertPointer(val, 3); 3365 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3366 PetscFunctionReturn(PETSC_SUCCESS); 3367 } 3368 3369 /*@ 3370 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3371 3372 Logically Collective 3373 3374 Input Parameters: 3375 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3376 - icntl - index of MUMPS parameter array RINFOG() 3377 3378 Output Parameter: 3379 . val - value of MUMPS RINFOG(icntl) 3380 3381 Level: beginner 3382 3383 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3384 @*/ 3385 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3386 { 3387 PetscFunctionBegin; 3388 PetscValidType(F, 1); 3389 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3390 PetscAssertPointer(val, 3); 3391 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3392 PetscFunctionReturn(PETSC_SUCCESS); 3393 } 3394 3395 /*@ 3396 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3397 3398 Logically Collective 3399 3400 Input Parameter: 3401 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3402 3403 Output Parameters: 3404 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3405 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3406 for freeing this array. 3407 3408 Level: beginner 3409 3410 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3411 @*/ 3412 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3413 { 3414 PetscFunctionBegin; 3415 PetscValidType(F, 1); 3416 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3417 PetscAssertPointer(size, 2); 3418 PetscAssertPointer(array, 3); 3419 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3420 PetscFunctionReturn(PETSC_SUCCESS); 3421 } 3422 3423 /*MC 3424 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3425 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3426 3427 Works with `MATAIJ` and `MATSBAIJ` matrices 3428 3429 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3430 3431 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3432 See details below. 3433 3434 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3435 3436 Options Database Keys: 3437 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3438 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3439 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3440 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3441 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3442 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3443 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3444 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3445 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3446 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3447 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3448 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3449 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3450 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3451 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3452 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3453 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3454 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3455 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3456 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3457 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3458 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3459 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3460 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3461 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3462 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3463 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3464 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3465 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3466 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3467 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3468 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3469 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3470 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3471 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3472 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3473 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3474 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3475 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3476 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3477 3478 Level: beginner 3479 3480 Notes: 3481 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3482 error if the matrix is Hermitian. 3483 3484 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3485 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3486 3487 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3488 the failure with 3489 .vb 3490 KSPGetPC(ksp,&pc); 3491 PCFactorGetMatrix(pc,&mat); 3492 MatMumpsGetInfo(mat,....); 3493 MatMumpsGetInfog(mat,....); etc. 3494 .ve 3495 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3496 3497 MUMPS provides 64-bit integer support in two build modes: 3498 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3499 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3500 3501 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3502 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3503 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3504 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3505 3506 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3507 3508 Two modes to run MUMPS/PETSc with OpenMP 3509 .vb 3510 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3511 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 3512 .ve 3513 3514 .vb 3515 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 3516 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3517 .ve 3518 3519 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3520 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3521 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3522 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3523 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3524 3525 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3526 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3527 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3528 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3529 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3530 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3531 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3532 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3533 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3534 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 3535 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3536 examine the mapping result. 3537 3538 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3539 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3540 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3541 3542 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3543 3544 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3545 M*/ 3546 3547 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3548 { 3549 PetscFunctionBegin; 3550 *type = MATSOLVERMUMPS; 3551 PetscFunctionReturn(PETSC_SUCCESS); 3552 } 3553 3554 /* MatGetFactor for Seq and MPI AIJ matrices */ 3555 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3556 { 3557 Mat B; 3558 Mat_MUMPS *mumps; 3559 PetscBool isSeqAIJ, isDiag, isDense; 3560 PetscMPIInt size; 3561 3562 PetscFunctionBegin; 3563 #if defined(PETSC_USE_COMPLEX) 3564 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3565 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3566 *F = NULL; 3567 PetscFunctionReturn(PETSC_SUCCESS); 3568 } 3569 #endif 3570 /* Create the factorization matrix */ 3571 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3572 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3573 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3574 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3575 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3576 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3577 PetscCall(MatSetUp(B)); 3578 3579 PetscCall(PetscNew(&mumps)); 3580 3581 B->ops->view = MatView_MUMPS; 3582 B->ops->getinfo = MatGetInfo_MUMPS; 3583 3584 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3585 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3586 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3587 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3588 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3589 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3590 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3591 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3592 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3593 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3594 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3595 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 3599 3600 if (ftype == MAT_FACTOR_LU) { 3601 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3602 B->factortype = MAT_FACTOR_LU; 3603 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3604 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3605 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3606 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3607 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3608 mumps->sym = 0; 3609 } else { 3610 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3611 B->factortype = MAT_FACTOR_CHOLESKY; 3612 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3613 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3614 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 3615 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3616 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3617 #if defined(PETSC_USE_COMPLEX) 3618 mumps->sym = 2; 3619 #else 3620 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3621 else mumps->sym = 2; 3622 #endif 3623 } 3624 3625 /* set solvertype */ 3626 PetscCall(PetscFree(B->solvertype)); 3627 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3628 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3629 if (size == 1) { 3630 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3631 B->canuseordering = PETSC_TRUE; 3632 } 3633 B->ops->destroy = MatDestroy_MUMPS; 3634 B->data = (void *)mumps; 3635 3636 *F = B; 3637 mumps->id.job = JOB_NULL; 3638 mumps->ICNTL_pre = NULL; 3639 mumps->CNTL_pre = NULL; 3640 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3641 PetscFunctionReturn(PETSC_SUCCESS); 3642 } 3643 3644 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3645 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 3646 { 3647 Mat B; 3648 Mat_MUMPS *mumps; 3649 PetscBool isSeqSBAIJ; 3650 PetscMPIInt size; 3651 3652 PetscFunctionBegin; 3653 #if defined(PETSC_USE_COMPLEX) 3654 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3655 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3656 *F = NULL; 3657 PetscFunctionReturn(PETSC_SUCCESS); 3658 } 3659 #endif 3660 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3661 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3662 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3663 PetscCall(MatSetUp(B)); 3664 3665 PetscCall(PetscNew(&mumps)); 3666 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3667 if (isSeqSBAIJ) { 3668 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3669 } else { 3670 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3671 } 3672 3673 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3674 B->ops->view = MatView_MUMPS; 3675 B->ops->getinfo = MatGetInfo_MUMPS; 3676 3677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3680 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3681 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3682 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3683 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3684 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3685 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3686 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3687 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3688 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3689 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3690 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3691 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 3692 3693 B->factortype = MAT_FACTOR_CHOLESKY; 3694 #if defined(PETSC_USE_COMPLEX) 3695 mumps->sym = 2; 3696 #else 3697 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3698 else mumps->sym = 2; 3699 #endif 3700 3701 /* set solvertype */ 3702 PetscCall(PetscFree(B->solvertype)); 3703 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3704 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3705 if (size == 1) { 3706 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3707 B->canuseordering = PETSC_TRUE; 3708 } 3709 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3710 B->ops->destroy = MatDestroy_MUMPS; 3711 B->data = (void *)mumps; 3712 3713 *F = B; 3714 mumps->id.job = JOB_NULL; 3715 mumps->ICNTL_pre = NULL; 3716 mumps->CNTL_pre = NULL; 3717 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3718 PetscFunctionReturn(PETSC_SUCCESS); 3719 } 3720 3721 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3722 { 3723 Mat B; 3724 Mat_MUMPS *mumps; 3725 PetscBool isSeqBAIJ; 3726 PetscMPIInt size; 3727 3728 PetscFunctionBegin; 3729 /* Create the factorization matrix */ 3730 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3731 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3732 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3733 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3734 PetscCall(MatSetUp(B)); 3735 3736 PetscCall(PetscNew(&mumps)); 3737 PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3738 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3739 B->factortype = MAT_FACTOR_LU; 3740 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3741 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3742 mumps->sym = 0; 3743 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3744 3745 B->ops->view = MatView_MUMPS; 3746 B->ops->getinfo = MatGetInfo_MUMPS; 3747 3748 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3749 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3750 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3751 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3752 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3753 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3754 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3755 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3756 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3757 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3758 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3759 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3760 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3761 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3762 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 3763 3764 /* set solvertype */ 3765 PetscCall(PetscFree(B->solvertype)); 3766 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3767 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3768 if (size == 1) { 3769 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3770 B->canuseordering = PETSC_TRUE; 3771 } 3772 B->ops->destroy = MatDestroy_MUMPS; 3773 B->data = (void *)mumps; 3774 3775 *F = B; 3776 mumps->id.job = JOB_NULL; 3777 mumps->ICNTL_pre = NULL; 3778 mumps->CNTL_pre = NULL; 3779 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3780 PetscFunctionReturn(PETSC_SUCCESS); 3781 } 3782 3783 /* MatGetFactor for Seq and MPI SELL matrices */ 3784 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3785 { 3786 Mat B; 3787 Mat_MUMPS *mumps; 3788 PetscBool isSeqSELL; 3789 PetscMPIInt size; 3790 3791 PetscFunctionBegin; 3792 /* Create the factorization matrix */ 3793 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3794 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3795 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3796 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3797 PetscCall(MatSetUp(B)); 3798 3799 PetscCall(PetscNew(&mumps)); 3800 3801 B->ops->view = MatView_MUMPS; 3802 B->ops->getinfo = MatGetInfo_MUMPS; 3803 3804 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3805 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3806 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3807 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3809 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3810 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3811 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3812 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3813 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3814 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3815 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3816 3817 PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3818 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3819 B->factortype = MAT_FACTOR_LU; 3820 PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3821 mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3822 mumps->sym = 0; 3823 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3824 3825 /* set solvertype */ 3826 PetscCall(PetscFree(B->solvertype)); 3827 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3828 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3829 if (size == 1) { 3830 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3831 B->canuseordering = PETSC_TRUE; 3832 } 3833 B->ops->destroy = MatDestroy_MUMPS; 3834 B->data = (void *)mumps; 3835 3836 *F = B; 3837 mumps->id.job = JOB_NULL; 3838 mumps->ICNTL_pre = NULL; 3839 mumps->CNTL_pre = NULL; 3840 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3841 PetscFunctionReturn(PETSC_SUCCESS); 3842 } 3843 3844 /* MatGetFactor for MATNEST matrices */ 3845 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3846 { 3847 Mat B, **mats; 3848 Mat_MUMPS *mumps; 3849 PetscInt nr, nc; 3850 PetscMPIInt size; 3851 PetscBool flg = PETSC_TRUE; 3852 3853 PetscFunctionBegin; 3854 #if defined(PETSC_USE_COMPLEX) 3855 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3856 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3857 *F = NULL; 3858 PetscFunctionReturn(PETSC_SUCCESS); 3859 } 3860 #endif 3861 3862 /* Return if some condition is not satisfied */ 3863 *F = NULL; 3864 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3865 if (ftype == MAT_FACTOR_CHOLESKY) { 3866 IS *rows, *cols; 3867 PetscInt *m, *M; 3868 3869 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3870 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3871 PetscCall(MatNestGetISs(A, rows, cols)); 3872 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3873 if (!flg) { 3874 PetscCall(PetscFree2(rows, cols)); 3875 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3876 PetscFunctionReturn(PETSC_SUCCESS); 3877 } 3878 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3879 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3880 for (PetscInt r = 0; flg && r < nr; r++) 3881 for (PetscInt k = r + 1; flg && k < nr; k++) 3882 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3883 PetscCall(PetscFree2(m, M)); 3884 PetscCall(PetscFree2(rows, cols)); 3885 if (!flg) { 3886 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3887 PetscFunctionReturn(PETSC_SUCCESS); 3888 } 3889 } 3890 3891 for (PetscInt r = 0; r < nr; r++) { 3892 for (PetscInt c = 0; c < nc; c++) { 3893 Mat sub = mats[r][c]; 3894 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 3895 3896 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3897 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 3898 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3899 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3900 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3901 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3902 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3903 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3904 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3905 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 3906 if (ftype == MAT_FACTOR_CHOLESKY) { 3907 if (r == c) { 3908 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 3909 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3910 flg = PETSC_FALSE; 3911 } 3912 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3913 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3914 flg = PETSC_FALSE; 3915 } 3916 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 3917 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3918 flg = PETSC_FALSE; 3919 } 3920 } 3921 } 3922 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3923 3924 /* Create the factorization matrix */ 3925 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3926 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3927 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3928 PetscCall(MatSetUp(B)); 3929 3930 PetscCall(PetscNew(&mumps)); 3931 3932 B->ops->view = MatView_MUMPS; 3933 B->ops->getinfo = MatGetInfo_MUMPS; 3934 3935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3946 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3947 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3948 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3949 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 3950 3951 if (ftype == MAT_FACTOR_LU) { 3952 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3953 B->factortype = MAT_FACTOR_LU; 3954 mumps->sym = 0; 3955 } else { 3956 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3957 B->factortype = MAT_FACTOR_CHOLESKY; 3958 #if defined(PETSC_USE_COMPLEX) 3959 mumps->sym = 2; 3960 #else 3961 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3962 else mumps->sym = 2; 3963 #endif 3964 } 3965 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3966 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3967 3968 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3969 if (size == 1) { 3970 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3971 B->canuseordering = PETSC_TRUE; 3972 } 3973 3974 /* set solvertype */ 3975 PetscCall(PetscFree(B->solvertype)); 3976 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3977 B->ops->destroy = MatDestroy_MUMPS; 3978 B->data = (void *)mumps; 3979 3980 *F = B; 3981 mumps->id.job = JOB_NULL; 3982 mumps->ICNTL_pre = NULL; 3983 mumps->CNTL_pre = NULL; 3984 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3985 PetscFunctionReturn(PETSC_SUCCESS); 3986 } 3987 3988 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3989 { 3990 PetscFunctionBegin; 3991 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3992 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3993 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3994 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3995 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3996 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3997 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3998 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3999 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4000 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4001 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 4002 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4003 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4004 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4005 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4006 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4007 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4008 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 4009 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 4010 PetscFunctionReturn(PETSC_SUCCESS); 4011 } 4012