1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #if defined(PETSC_USE_REAL_SINGLE) 15 #include <cmumps_c.h> 16 #else 17 #include <zmumps_c.h> 18 #endif 19 #else 20 #if defined(PETSC_USE_REAL_SINGLE) 21 #include <smumps_c.h> 22 #else 23 #include <dmumps_c.h> 24 #endif 25 #endif 26 EXTERN_C_END 27 #define JOB_INIT -1 28 #define JOB_NULL 0 29 #define JOB_FACTSYMBOLIC 1 30 #define JOB_FACTNUMERIC 2 31 #define JOB_SOLVE 3 32 #define JOB_END -2 33 34 /* calls to MUMPS */ 35 #if defined(PETSC_USE_COMPLEX) 36 #if defined(PETSC_USE_REAL_SINGLE) 37 #define MUMPS_c cmumps_c 38 #else 39 #define MUMPS_c zmumps_c 40 #endif 41 #else 42 #if defined(PETSC_USE_REAL_SINGLE) 43 #define MUMPS_c smumps_c 44 #else 45 #define MUMPS_c dmumps_c 46 #endif 47 #endif 48 49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 50 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 51 naming convention in PetscMPIInt, PetscBLASInt etc. 52 */ 53 typedef MUMPS_INT PetscMUMPSInt; 54 55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 56 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 57 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 58 #endif 59 #else 60 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 61 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 62 #endif 63 #endif 64 65 #define MPIU_MUMPSINT MPI_INT 66 #define PETSC_MUMPS_INT_MAX 2147483647 67 #define PETSC_MUMPS_INT_MIN -2147483648 68 69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) 71 { 72 PetscFunctionBegin; 73 #if PetscDefined(USE_64BIT_INDICES) 74 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 75 #endif 76 *b = (PetscMUMPSInt)(a); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* Put these utility routines here since they are only used in this file */ 81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 82 { 83 PetscInt myval; 84 PetscBool myset; 85 PetscFunctionBegin; 86 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 87 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 88 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 89 if (set) *set = myset; 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 93 94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 95 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 96 #define PetscMUMPS_c(mumps) \ 97 do { \ 98 if (mumps->use_petsc_omp_support) { \ 99 if (mumps->is_omp_master) { \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 101 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 102 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 103 PetscCall(PetscFPTrapPop()); \ 104 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 105 } \ 106 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 107 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 108 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 109 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 110 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 111 */ \ 112 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 113 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \ 114 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 115 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \ 116 } else { \ 117 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 118 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 119 PetscCall(PetscFPTrapPop()); \ 120 } \ 121 } while (0) 122 #else 123 #define PetscMUMPS_c(mumps) \ 124 do { \ 125 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 126 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \ 127 PetscCall(PetscFPTrapPop()); \ 128 } while (0) 129 #endif 130 131 /* declare MumpsScalar */ 132 #if defined(PETSC_USE_COMPLEX) 133 #if defined(PETSC_USE_REAL_SINGLE) 134 #define MumpsScalar mumps_complex 135 #else 136 #define MumpsScalar mumps_double_complex 137 #endif 138 #else 139 #define MumpsScalar PetscScalar 140 #endif 141 142 /* macros s.t. indices match MUMPS documentation */ 143 #define ICNTL(I) icntl[(I)-1] 144 #define CNTL(I) cntl[(I)-1] 145 #define INFOG(I) infog[(I)-1] 146 #define INFO(I) info[(I)-1] 147 #define RINFOG(I) rinfog[(I)-1] 148 #define RINFO(I) rinfo[(I)-1] 149 150 typedef struct Mat_MUMPS Mat_MUMPS; 151 struct Mat_MUMPS { 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_REAL_SINGLE) 154 CMUMPS_STRUC_C id; 155 #else 156 ZMUMPS_STRUC_C id; 157 #endif 158 #else 159 #if defined(PETSC_USE_REAL_SINGLE) 160 SMUMPS_STRUC_C id; 161 #else 162 DMUMPS_STRUC_C id; 163 #endif 164 #endif 165 166 MatStructure matstruc; 167 PetscMPIInt myid, petsc_size; 168 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 169 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 170 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 171 PetscMUMPSInt sym; 172 MPI_Comm mumps_comm; 173 PetscMUMPSInt *ICNTL_pre; 174 PetscReal *CNTL_pre; 175 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 176 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 177 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 178 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 179 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180 PetscInt *rhs_nrow, max_nrhs; 181 PetscMPIInt *rhs_recvcounts, *rhs_disps; 182 PetscScalar *rhs_loc, *rhs_recvbuf; 183 #endif 184 Vec b_seq, x_seq; 185 PetscInt ninfo, *info; /* which INFO to display */ 186 PetscInt sizeredrhs; 187 PetscScalar *schur_sol; 188 PetscInt schur_sizesol; 189 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 190 PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 191 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 192 193 /* Support for MATNEST */ 194 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 195 PetscInt64 *nest_vals_start; 196 PetscScalar *nest_vals; 197 198 /* stuff used by petsc/mumps OpenMP support*/ 199 PetscBool use_petsc_omp_support; 200 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 201 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */ 202 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 203 PetscMPIInt tag, omp_comm_size; 204 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 205 MPI_Request *reqs; 206 }; 207 208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 209 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 210 */ 211 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 212 { 213 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 214 215 PetscFunctionBegin; 216 #if defined(PETSC_USE_64BIT_INDICES) 217 { 218 PetscInt i; 219 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 220 PetscCall(PetscFree(mumps->ia_alloc)); 221 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 222 mumps->cur_ilen = nrow + 1; 223 } 224 if (nnz > mumps->cur_jlen) { 225 PetscCall(PetscFree(mumps->ja_alloc)); 226 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 227 mumps->cur_jlen = nnz; 228 } 229 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]))); 230 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]))); 231 *ia_mumps = mumps->ia_alloc; 232 *ja_mumps = mumps->ja_alloc; 233 } 234 #else 235 *ia_mumps = ia; 236 *ja_mumps = ja; 237 #endif 238 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 243 { 244 PetscFunctionBegin; 245 PetscCall(PetscFree(mumps->id.listvar_schur)); 246 PetscCall(PetscFree(mumps->id.redrhs)); 247 PetscCall(PetscFree(mumps->schur_sol)); 248 mumps->id.size_schur = 0; 249 mumps->id.schur_lld = 0; 250 mumps->id.ICNTL(19) = 0; 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 /* solve with rhs in mumps->id.redrhs and return in the same location */ 255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 256 { 257 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 258 Mat S, B, X; 259 MatFactorSchurStatus schurstatus; 260 PetscInt sizesol; 261 262 PetscFunctionBegin; 263 PetscCall(MatFactorFactorizeSchurComplement(F)); 264 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 265 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B)); 266 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 268 PetscCall(MatBindToCPU(B, S->boundtocpu)); 269 #endif 270 switch (schurstatus) { 271 case MAT_FACTOR_SCHUR_FACTORED: 272 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); 273 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 275 PetscCall(MatBindToCPU(X, S->boundtocpu)); 276 #endif 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatMatSolveTranspose(S, B, X)); 279 } else { 280 PetscCall(MatMatSolve(S, B, X)); 281 } 282 break; 283 case MAT_FACTOR_SCHUR_INVERTED: 284 sizesol = mumps->id.nrhs * mumps->id.size_schur; 285 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 286 PetscCall(PetscFree(mumps->schur_sol)); 287 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 288 mumps->schur_sizesol = sizesol; 289 } 290 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 291 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 293 PetscCall(MatBindToCPU(X, S->boundtocpu)); 294 #endif 295 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 296 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 297 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 298 } else { 299 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 300 } 301 PetscCall(MatProductSetFromOptions(X)); 302 PetscCall(MatProductSymbolic(X)); 303 PetscCall(MatProductNumeric(X)); 304 305 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 306 break; 307 default: 308 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 309 } 310 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 311 PetscCall(MatDestroy(&B)); 312 PetscCall(MatDestroy(&X)); 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 317 { 318 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 319 320 PetscFunctionBegin; 321 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 if (!expansion) { /* prepare for the condensation step */ 325 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 326 /* allocate MUMPS internal array to store reduced right-hand sides */ 327 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = mumps->id.size_schur; 330 PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs)); 331 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 332 } 333 } else { /* prepare for the expansion step */ 334 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 335 PetscCall(MatMumpsSolveSchur_Private(F)); 336 mumps->id.ICNTL(26) = 2; /* expansion phase */ 337 PetscMUMPS_c(mumps); 338 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 339 /* restore defaults */ 340 mumps->id.ICNTL(26) = -1; 341 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 342 if (mumps->id.nrhs > 1) { 343 PetscCall(PetscFree(mumps->id.redrhs)); 344 mumps->id.lredrhs = 0; 345 mumps->sizeredrhs = 0; 346 } 347 } 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /* 352 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 353 354 input: 355 A - matrix in aij,baij or sbaij format 356 shift - 0: C style output triple; 1: Fortran style output triple. 357 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 358 MAT_REUSE_MATRIX: only the values in v array are updated 359 output: 360 nnz - dim of r, c, and v (number of local nonzero entries of A) 361 r, c, v - row and col index, matrix values (matrix triples) 362 363 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 364 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 365 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 366 367 */ 368 369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 370 { 371 const PetscScalar *av; 372 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 373 PetscInt64 nz, rnz, i, j, k; 374 PetscMUMPSInt *row, *col; 375 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 376 377 PetscFunctionBegin; 378 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 379 if (reuse == MAT_INITIAL_MATRIX) { 380 nz = aa->nz; 381 ai = aa->i; 382 aj = aa->j; 383 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 384 for (i = k = 0; i < M; i++) { 385 rnz = ai[i + 1] - ai[i]; 386 ajj = aj + ai[i]; 387 for (j = 0; j < rnz; j++) { 388 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 389 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 390 k++; 391 } 392 } 393 mumps->val = (PetscScalar *)av; 394 mumps->irn = row; 395 mumps->jcn = col; 396 mumps->nnz = nz; 397 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 398 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 399 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 404 { 405 PetscInt64 nz, i, j, k, r; 406 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 407 PetscMUMPSInt *row, *col; 408 409 PetscFunctionBegin; 410 nz = a->sliidx[a->totalslices]; 411 if (reuse == MAT_INITIAL_MATRIX) { 412 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 413 for (i = k = 0; i < a->totalslices; i++) { 414 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 415 } 416 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 417 mumps->irn = row; 418 mumps->jcn = col; 419 mumps->nnz = nz; 420 mumps->val = a->val; 421 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 422 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 427 { 428 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 429 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 430 PetscInt64 M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 431 PetscInt bs; 432 PetscMUMPSInt *row, *col; 433 434 PetscFunctionBegin; 435 if (reuse == MAT_INITIAL_MATRIX) { 436 PetscCall(MatGetBlockSize(A, &bs)); 437 M = A->rmap->N / bs; 438 ai = aa->i; 439 aj = aa->j; 440 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 441 for (i = 0; i < M; i++) { 442 ajj = aj + ai[i]; 443 rnz = ai[i + 1] - ai[i]; 444 for (k = 0; k < rnz; k++) { 445 for (j = 0; j < bs; j++) { 446 for (m = 0; m < bs; m++) { 447 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 448 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 449 idx++; 450 } 451 } 452 } 453 } 454 mumps->irn = row; 455 mumps->jcn = col; 456 mumps->nnz = nz; 457 mumps->val = aa->a; 458 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 459 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 464 { 465 const PetscInt *ai, *aj, *ajj; 466 PetscInt bs; 467 PetscInt64 nz, rnz, i, j, k, m; 468 PetscMUMPSInt *row, *col; 469 PetscScalar *val; 470 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 471 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 472 #if defined(PETSC_USE_COMPLEX) 473 PetscBool isset, hermitian; 474 #endif 475 476 PetscFunctionBegin; 477 #if defined(PETSC_USE_COMPLEX) 478 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 479 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 480 #endif 481 ai = aa->i; 482 aj = aa->j; 483 PetscCall(MatGetBlockSize(A, &bs)); 484 if (reuse == MAT_INITIAL_MATRIX) { 485 const PetscInt64 alloc_size = aa->nz * bs2; 486 487 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 488 if (bs > 1) { 489 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 490 mumps->val = mumps->val_alloc; 491 } else { 492 mumps->val = aa->a; 493 } 494 mumps->irn = row; 495 mumps->jcn = col; 496 } else { 497 row = mumps->irn; 498 col = mumps->jcn; 499 } 500 val = mumps->val; 501 502 nz = 0; 503 if (bs > 1) { 504 for (i = 0; i < mbs; i++) { 505 rnz = ai[i + 1] - ai[i]; 506 ajj = aj + ai[i]; 507 for (j = 0; j < rnz; j++) { 508 for (k = 0; k < bs; k++) { 509 for (m = 0; m < bs; m++) { 510 if (ajj[j] > i || k >= m) { 511 if (reuse == MAT_INITIAL_MATRIX) { 512 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 513 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 514 } 515 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 516 } 517 } 518 } 519 } 520 } 521 } else if (reuse == MAT_INITIAL_MATRIX) { 522 for (i = 0; i < mbs; i++) { 523 rnz = ai[i + 1] - ai[i]; 524 ajj = aj + ai[i]; 525 for (j = 0; j < rnz; j++) { 526 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 527 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 528 nz++; 529 } 530 } 531 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz); 532 } else if (mumps->nest_vals) 533 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 534 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 535 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 536 PetscFunctionReturn(PETSC_SUCCESS); 537 } 538 539 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 540 { 541 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 542 PetscInt64 nz, rnz, i, j; 543 const PetscScalar *av, *v1; 544 PetscScalar *val; 545 PetscMUMPSInt *row, *col; 546 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 547 PetscBool missing; 548 #if defined(PETSC_USE_COMPLEX) 549 PetscBool hermitian, isset; 550 #endif 551 552 PetscFunctionBegin; 553 #if defined(PETSC_USE_COMPLEX) 554 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 555 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 556 #endif 557 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 558 ai = aa->i; 559 aj = aa->j; 560 adiag = aa->diag; 561 PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL)); 562 if (reuse == MAT_INITIAL_MATRIX) { 563 /* count nz in the upper triangular part of A */ 564 nz = 0; 565 if (missing) { 566 for (i = 0; i < M; i++) { 567 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 568 for (j = ai[i]; j < ai[i + 1]; j++) { 569 if (aj[j] < i) continue; 570 nz++; 571 } 572 } else { 573 nz += ai[i + 1] - adiag[i]; 574 } 575 } 576 } else { 577 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 578 } 579 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 580 PetscCall(PetscMalloc1(nz, &val)); 581 mumps->nnz = nz; 582 mumps->irn = row; 583 mumps->jcn = col; 584 mumps->val = mumps->val_alloc = val; 585 586 nz = 0; 587 if (missing) { 588 for (i = 0; i < M; i++) { 589 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 590 for (j = ai[i]; j < ai[i + 1]; j++) { 591 if (aj[j] < i) continue; 592 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 593 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 594 val[nz] = av[j]; 595 nz++; 596 } 597 } else { 598 rnz = ai[i + 1] - adiag[i]; 599 ajj = aj + adiag[i]; 600 v1 = av + adiag[i]; 601 for (j = 0; j < rnz; j++) { 602 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 603 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 604 val[nz++] = v1[j]; 605 } 606 } 607 } 608 } else { 609 for (i = 0; i < M; i++) { 610 rnz = ai[i + 1] - adiag[i]; 611 ajj = aj + adiag[i]; 612 v1 = av + adiag[i]; 613 for (j = 0; j < rnz; j++) { 614 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 615 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 616 val[nz++] = v1[j]; 617 } 618 } 619 } 620 } else { 621 nz = 0; 622 val = mumps->val; 623 if (missing) { 624 for (i = 0; i < M; i++) { 625 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 626 for (j = ai[i]; j < ai[i + 1]; j++) { 627 if (aj[j] < i) continue; 628 val[nz++] = av[j]; 629 } 630 } else { 631 rnz = ai[i + 1] - adiag[i]; 632 v1 = av + adiag[i]; 633 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 634 } 635 } 636 } else { 637 for (i = 0; i < M; i++) { 638 rnz = ai[i + 1] - adiag[i]; 639 v1 = av + adiag[i]; 640 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 641 } 642 } 643 } 644 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 649 { 650 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 651 PetscInt bs; 652 PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB; 653 PetscMUMPSInt *row, *col; 654 const PetscScalar *av, *bv, *v1, *v2; 655 PetscScalar *val; 656 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 657 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data; 658 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 659 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 660 #if defined(PETSC_USE_COMPLEX) 661 PetscBool hermitian, isset; 662 #endif 663 664 PetscFunctionBegin; 665 #if defined(PETSC_USE_COMPLEX) 666 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 667 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 668 #endif 669 PetscCall(MatGetBlockSize(A, &bs)); 670 rstart = A->rmap->rstart; 671 ai = aa->i; 672 aj = aa->j; 673 bi = bb->i; 674 bj = bb->j; 675 av = aa->a; 676 bv = bb->a; 677 678 garray = mat->garray; 679 680 if (reuse == MAT_INITIAL_MATRIX) { 681 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 682 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 683 PetscCall(PetscMalloc1(nz, &val)); 684 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 685 mumps->irn = row; 686 mumps->jcn = col; 687 mumps->val = mumps->val_alloc = val; 688 } else { 689 val = mumps->val; 690 } 691 692 jj = 0; 693 irow = rstart; 694 for (i = 0; i < mbs; i++) { 695 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 696 countA = ai[i + 1] - ai[i]; 697 countB = bi[i + 1] - bi[i]; 698 bjj = bj + bi[i]; 699 v1 = av + ai[i] * bs2; 700 v2 = bv + bi[i] * bs2; 701 702 if (bs > 1) { 703 /* A-part */ 704 for (j = 0; j < countA; j++) { 705 for (k = 0; k < bs; k++) { 706 for (m = 0; m < bs; m++) { 707 if (rstart + ajj[j] * bs > irow || k >= m) { 708 if (reuse == MAT_INITIAL_MATRIX) { 709 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 710 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 711 } 712 val[jj++] = v1[j * bs2 + m + k * bs]; 713 } 714 } 715 } 716 } 717 718 /* B-part */ 719 for (j = 0; j < countB; j++) { 720 for (k = 0; k < bs; k++) { 721 for (m = 0; m < bs; m++) { 722 if (reuse == MAT_INITIAL_MATRIX) { 723 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 724 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 725 } 726 val[jj++] = v2[j * bs2 + m + k * bs]; 727 } 728 } 729 } 730 } else { 731 /* A-part */ 732 for (j = 0; j < countA; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 735 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 736 } 737 val[jj++] = v1[j]; 738 } 739 740 /* B-part */ 741 for (j = 0; j < countB; j++) { 742 if (reuse == MAT_INITIAL_MATRIX) { 743 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 744 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 745 } 746 val[jj++] = v2[j]; 747 } 748 } 749 irow += bs; 750 } 751 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 756 { 757 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 758 PetscInt64 rstart, cstart, nz, i, j, jj, irow, countA, countB; 759 PetscMUMPSInt *row, *col; 760 const PetscScalar *av, *bv, *v1, *v2; 761 PetscScalar *val; 762 Mat Ad, Ao; 763 Mat_SeqAIJ *aa; 764 Mat_SeqAIJ *bb; 765 766 PetscFunctionBegin; 767 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 768 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 769 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 770 771 aa = (Mat_SeqAIJ *)(Ad)->data; 772 bb = (Mat_SeqAIJ *)(Ao)->data; 773 ai = aa->i; 774 aj = aa->j; 775 bi = bb->i; 776 bj = bb->j; 777 778 rstart = A->rmap->rstart; 779 cstart = A->cmap->rstart; 780 781 if (reuse == MAT_INITIAL_MATRIX) { 782 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 783 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 784 PetscCall(PetscMalloc1(nz, &val)); 785 mumps->nnz = nz; 786 mumps->irn = row; 787 mumps->jcn = col; 788 mumps->val = mumps->val_alloc = val; 789 } else { 790 val = mumps->val; 791 } 792 793 jj = 0; 794 irow = rstart; 795 for (i = 0; i < m; i++) { 796 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 797 countA = ai[i + 1] - ai[i]; 798 countB = bi[i + 1] - bi[i]; 799 bjj = bj + bi[i]; 800 v1 = av + ai[i]; 801 v2 = bv + bi[i]; 802 803 /* A-part */ 804 for (j = 0; j < countA; j++) { 805 if (reuse == MAT_INITIAL_MATRIX) { 806 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 807 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 808 } 809 val[jj++] = v1[j]; 810 } 811 812 /* B-part */ 813 for (j = 0; j < countB; j++) { 814 if (reuse == MAT_INITIAL_MATRIX) { 815 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 816 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 817 } 818 val[jj++] = v2[j]; 819 } 820 irow++; 821 } 822 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 823 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 827 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 828 { 829 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 830 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data; 831 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data; 832 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 833 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 834 const PetscInt bs2 = mat->bs2; 835 PetscInt bs; 836 PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx; 837 PetscMUMPSInt *row, *col; 838 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 839 PetscScalar *val; 840 841 PetscFunctionBegin; 842 PetscCall(MatGetBlockSize(A, &bs)); 843 if (reuse == MAT_INITIAL_MATRIX) { 844 nz = bs2 * (aa->nz + bb->nz); 845 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 846 PetscCall(PetscMalloc1(nz, &val)); 847 mumps->nnz = nz; 848 mumps->irn = row; 849 mumps->jcn = col; 850 mumps->val = mumps->val_alloc = val; 851 } else { 852 val = mumps->val; 853 } 854 855 jj = 0; 856 irow = rstart; 857 for (i = 0; i < mbs; i++) { 858 countA = ai[i + 1] - ai[i]; 859 countB = bi[i + 1] - bi[i]; 860 ajj = aj + ai[i]; 861 bjj = bj + bi[i]; 862 v1 = av + bs2 * ai[i]; 863 v2 = bv + bs2 * bi[i]; 864 865 idx = 0; 866 /* A-part */ 867 for (k = 0; k < countA; k++) { 868 for (j = 0; j < bs; j++) { 869 for (n = 0; n < bs; n++) { 870 if (reuse == MAT_INITIAL_MATRIX) { 871 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 872 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 873 } 874 val[jj++] = v1[idx++]; 875 } 876 } 877 } 878 879 idx = 0; 880 /* B-part */ 881 for (k = 0; k < countB; k++) { 882 for (j = 0; j < bs; j++) { 883 for (n = 0; n < bs; n++) { 884 if (reuse == MAT_INITIAL_MATRIX) { 885 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 886 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 887 } 888 val[jj++] = v2[idx++]; 889 } 890 } 891 } 892 irow += bs; 893 } 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 898 { 899 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 900 PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 901 PetscMUMPSInt *row, *col; 902 const PetscScalar *av, *bv, *v1, *v2; 903 PetscScalar *val; 904 Mat Ad, Ao; 905 Mat_SeqAIJ *aa; 906 Mat_SeqAIJ *bb; 907 #if defined(PETSC_USE_COMPLEX) 908 PetscBool hermitian, isset; 909 #endif 910 911 PetscFunctionBegin; 912 #if defined(PETSC_USE_COMPLEX) 913 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 914 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 915 #endif 916 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 917 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 918 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 919 920 aa = (Mat_SeqAIJ *)(Ad)->data; 921 bb = (Mat_SeqAIJ *)(Ao)->data; 922 ai = aa->i; 923 aj = aa->j; 924 adiag = aa->diag; 925 bi = bb->i; 926 bj = bb->j; 927 928 rstart = A->rmap->rstart; 929 930 if (reuse == MAT_INITIAL_MATRIX) { 931 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 932 nzb = 0; /* num of upper triangular entries in mat->B */ 933 for (i = 0; i < m; i++) { 934 nza += (ai[i + 1] - adiag[i]); 935 countB = bi[i + 1] - bi[i]; 936 bjj = bj + bi[i]; 937 for (j = 0; j < countB; j++) { 938 if (garray[bjj[j]] > rstart) nzb++; 939 } 940 } 941 942 nz = nza + nzb; /* total nz of upper triangular part of mat */ 943 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 944 PetscCall(PetscMalloc1(nz, &val)); 945 mumps->nnz = nz; 946 mumps->irn = row; 947 mumps->jcn = col; 948 mumps->val = mumps->val_alloc = val; 949 } else { 950 val = mumps->val; 951 } 952 953 jj = 0; 954 irow = rstart; 955 for (i = 0; i < m; i++) { 956 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 957 v1 = av + adiag[i]; 958 countA = ai[i + 1] - adiag[i]; 959 countB = bi[i + 1] - bi[i]; 960 bjj = bj + bi[i]; 961 v2 = bv + bi[i]; 962 963 /* A-part */ 964 for (j = 0; j < countA; j++) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 967 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 968 } 969 val[jj++] = v1[j]; 970 } 971 972 /* B-part */ 973 for (j = 0; j < countB; j++) { 974 if (garray[bjj[j]] > rstart) { 975 if (reuse == MAT_INITIAL_MATRIX) { 976 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 977 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 978 } 979 val[jj++] = v2[j]; 980 } 981 } 982 irow++; 983 } 984 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 985 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 986 PetscFunctionReturn(PETSC_SUCCESS); 987 } 988 989 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 990 { 991 const PetscScalar *av; 992 const PetscInt M = A->rmap->n; 993 PetscInt64 i; 994 PetscMUMPSInt *row, *col; 995 Vec v; 996 997 PetscFunctionBegin; 998 PetscCall(MatDiagonalGetDiagonal(A, &v)); 999 PetscCall(VecGetArrayRead(v, &av)); 1000 if (reuse == MAT_INITIAL_MATRIX) { 1001 PetscCall(PetscMalloc2(M, &row, M, &col)); 1002 for (i = 0; i < M; i++) { 1003 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1004 col[i] = row[i]; 1005 } 1006 mumps->val = (PetscScalar *)av; 1007 mumps->irn = row; 1008 mumps->jcn = col; 1009 mumps->nnz = M; 1010 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1011 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1012 PetscCall(VecRestoreArrayRead(v, &av)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1017 { 1018 Mat **mats; 1019 PetscInt nr, nc; 1020 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1021 1022 PetscFunctionBegin; 1023 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1024 if (reuse == MAT_INITIAL_MATRIX) { 1025 PetscMUMPSInt *irns, *jcns; 1026 PetscScalar *vals; 1027 PetscInt64 totnnz, cumnnz, maxnnz; 1028 PetscInt *pjcns_w; 1029 IS *rows, *cols; 1030 PetscInt **rows_idx, **cols_idx; 1031 1032 cumnnz = 0; 1033 maxnnz = 0; 1034 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1035 for (PetscInt r = 0; r < nr; r++) { 1036 for (PetscInt c = 0; c < nc; c++) { 1037 Mat sub = mats[r][c]; 1038 1039 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1040 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1041 if (sub) { 1042 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1043 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag; 1044 MatInfo info; 1045 1046 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1047 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1048 else { 1049 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1050 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1051 } 1052 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1053 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1054 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1055 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1056 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1057 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1058 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1059 1060 if (chol) { 1061 if (r == c) { 1062 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1063 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1064 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1065 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1066 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1067 } else { 1068 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1069 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1070 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1071 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1072 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1073 } 1074 } else { 1075 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1076 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1077 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1078 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1079 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1080 } 1081 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1082 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1083 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1084 cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */ 1085 maxnnz = PetscMax(maxnnz, info.nz_used); 1086 } 1087 } 1088 } 1089 1090 /* Allocate total COO */ 1091 totnnz = cumnnz; 1092 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1093 PetscCall(PetscMalloc1(totnnz, &vals)); 1094 1095 /* Handle rows and column maps 1096 We directly map rows and use an SF for the columns */ 1097 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1098 PetscCall(MatNestGetISs(A, rows, cols)); 1099 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1100 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1101 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1102 else (void)maxnnz; 1103 1104 cumnnz = 0; 1105 for (PetscInt r = 0; r < nr; r++) { 1106 for (PetscInt c = 0; c < nc; c++) { 1107 Mat sub = mats[r][c]; 1108 const PetscInt *ridx = rows_idx[r]; 1109 const PetscInt *cidx = cols_idx[c]; 1110 PetscInt rst; 1111 PetscSF csf; 1112 PetscBool isTrans, isHTrans = PETSC_FALSE, swap; 1113 PetscLayout cmap; 1114 1115 mumps->nest_vals_start[r * nc + c] = cumnnz; 1116 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1117 1118 /* Extract inner blocks if needed */ 1119 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1120 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1121 else { 1122 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1123 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1124 } 1125 swap = (PetscBool)(isTrans || isHTrans); 1126 1127 /* Get column layout to map off-process columns */ 1128 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1129 1130 /* Get row start to map on-process rows */ 1131 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1132 1133 /* Directly use the mumps datastructure and use C ordering for now */ 1134 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1135 1136 /* Swap the role of rows and columns indices for transposed blocks 1137 since we need values with global final ordering */ 1138 if (swap) { 1139 cidx = rows_idx[r]; 1140 ridx = cols_idx[c]; 1141 } 1142 1143 /* Communicate column indices 1144 This could have been done with a single SF but it would have complicated the code a lot. 1145 But since we do it only once, we pay the price of setting up an SF for each block */ 1146 if (PetscDefined(USE_64BIT_INDICES)) { 1147 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1148 } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */ 1149 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1150 PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1151 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1152 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1153 PetscCall(PetscSFDestroy(&csf)); 1154 1155 /* Import indices: use direct map for rows and mapped indices for columns */ 1156 if (swap) { 1157 for (PetscInt k = 0; k < mumps->nnz; k++) { 1158 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1159 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1160 } 1161 } else { 1162 for (PetscInt k = 0; k < mumps->nnz; k++) { 1163 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1164 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1165 } 1166 } 1167 1168 /* Import values to full COO */ 1169 PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1170 if (isHTrans) { /* conjugate the entries */ 1171 PetscScalar *v = vals + cumnnz; 1172 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]); 1173 } 1174 1175 /* Shift new starting point and sanity check */ 1176 cumnnz += mumps->nnz; 1177 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1178 1179 /* Free scratch memory */ 1180 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1181 PetscCall(PetscFree(mumps->val_alloc)); 1182 mumps->val = NULL; 1183 mumps->nnz = 0; 1184 } 1185 } 1186 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1187 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1188 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1189 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1190 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz); 1191 mumps->nest_vals_start[nr * nc] = cumnnz; 1192 1193 /* Set pointers for final MUMPS data structure */ 1194 mumps->nest_vals = vals; 1195 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1196 mumps->val = vals; 1197 mumps->irn = irns; 1198 mumps->jcn = jcns; 1199 mumps->nnz = cumnnz; 1200 } else { 1201 PetscScalar *oval = mumps->nest_vals; 1202 for (PetscInt r = 0; r < nr; r++) { 1203 for (PetscInt c = 0; c < nc; c++) { 1204 PetscBool isTrans, isHTrans = PETSC_FALSE; 1205 Mat sub = mats[r][c]; 1206 PetscInt midx = r * nc + c; 1207 1208 if (!mumps->nest_convert_to_triples[midx]) continue; 1209 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1210 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 1211 else { 1212 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1213 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 1214 } 1215 mumps->val = oval + mumps->nest_vals_start[midx]; 1216 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1217 if (isHTrans) { 1218 PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1219 for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]); 1220 } 1221 } 1222 } 1223 mumps->val = oval; 1224 } 1225 PetscFunctionReturn(PETSC_SUCCESS); 1226 } 1227 1228 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1229 { 1230 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1231 1232 PetscFunctionBegin; 1233 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 1234 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1235 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1236 PetscCall(VecDestroy(&mumps->b_seq)); 1237 PetscCall(VecDestroy(&mumps->x_seq)); 1238 PetscCall(PetscFree(mumps->id.perm_in)); 1239 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1240 PetscCall(PetscFree(mumps->val_alloc)); 1241 PetscCall(PetscFree(mumps->info)); 1242 PetscCall(PetscFree(mumps->ICNTL_pre)); 1243 PetscCall(PetscFree(mumps->CNTL_pre)); 1244 PetscCall(MatMumpsResetSchur_Private(mumps)); 1245 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1246 mumps->id.job = JOB_END; 1247 PetscMUMPS_c(mumps); 1248 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1249 if (mumps->mumps_comm != MPI_COMM_NULL) { 1250 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1251 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1252 } 1253 } 1254 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1255 if (mumps->use_petsc_omp_support) { 1256 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1257 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1258 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1259 } 1260 #endif 1261 PetscCall(PetscFree(mumps->ia_alloc)); 1262 PetscCall(PetscFree(mumps->ja_alloc)); 1263 PetscCall(PetscFree(mumps->recvcount)); 1264 PetscCall(PetscFree(mumps->reqs)); 1265 PetscCall(PetscFree(mumps->irhs_loc)); 1266 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1267 PetscCall(PetscFree(mumps->nest_vals)); 1268 PetscCall(PetscFree(A->data)); 1269 1270 /* clear composed functions */ 1271 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1272 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1277 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1278 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1279 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1280 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1281 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1282 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1283 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1284 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1285 PetscFunctionReturn(PETSC_SUCCESS); 1286 } 1287 1288 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1289 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1290 { 1291 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1292 const PetscMPIInt ompsize = mumps->omp_comm_size; 1293 PetscInt i, m, M, rstart; 1294 1295 PetscFunctionBegin; 1296 PetscCall(MatGetSize(A, &M, NULL)); 1297 PetscCall(MatGetLocalSize(A, &m, NULL)); 1298 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1299 if (ompsize == 1) { 1300 if (!mumps->irhs_loc) { 1301 mumps->nloc_rhs = m; 1302 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1303 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1304 for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */ 1305 } 1306 mumps->id.rhs_loc = (MumpsScalar *)array; 1307 } else { 1308 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1309 const PetscInt *ranges; 1310 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1311 MPI_Group petsc_group, omp_group; 1312 PetscScalar *recvbuf = NULL; 1313 1314 if (mumps->is_omp_master) { 1315 /* Lazily initialize the omp stuff for distributed rhs */ 1316 if (!mumps->irhs_loc) { 1317 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1318 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1319 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1320 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1321 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1322 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1323 1324 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1325 mumps->nloc_rhs = 0; 1326 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1327 for (j = 0; j < ompsize; j++) { 1328 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1329 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1330 } 1331 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1332 for (j = k = 0; j < ompsize; j++) { 1333 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */ 1334 } 1335 1336 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1337 PetscCallMPI(MPI_Group_free(&petsc_group)); 1338 PetscCallMPI(MPI_Group_free(&omp_group)); 1339 } 1340 1341 /* Realloc buffers when current nrhs is bigger than what we have met */ 1342 if (nrhs > mumps->max_nrhs) { 1343 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1344 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1345 mumps->max_nrhs = nrhs; 1346 } 1347 1348 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1349 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1350 mumps->rhs_disps[0] = 0; 1351 for (j = 1; j < ompsize; j++) { 1352 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1353 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1354 } 1355 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1356 } 1357 1358 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1359 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1360 1361 if (mumps->is_omp_master) { 1362 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1363 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1364 for (j = 0; j < ompsize; j++) { 1365 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1366 dst = dstbase; 1367 for (i = 0; i < nrhs; i++) { 1368 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1369 src += mumps->rhs_nrow[j]; 1370 dst += mumps->nloc_rhs; 1371 } 1372 dstbase += mumps->rhs_nrow[j]; 1373 } 1374 } 1375 mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc; 1376 } 1377 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1378 } 1379 mumps->id.nrhs = nrhs; 1380 mumps->id.nloc_rhs = mumps->nloc_rhs; 1381 mumps->id.lrhs_loc = mumps->nloc_rhs; 1382 mumps->id.irhs_loc = mumps->irhs_loc; 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1387 { 1388 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1389 const PetscScalar *rarray = NULL; 1390 PetscScalar *array; 1391 IS is_iden, is_petsc; 1392 PetscInt i; 1393 PetscBool second_solve = PETSC_FALSE; 1394 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1395 1396 PetscFunctionBegin; 1397 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1398 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1399 &cite1)); 1400 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1401 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1402 &cite2)); 1403 1404 if (A->factorerrortype) { 1405 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1406 PetscCall(VecSetInf(x)); 1407 PetscFunctionReturn(PETSC_SUCCESS); 1408 } 1409 1410 mumps->id.nrhs = 1; 1411 if (mumps->petsc_size > 1) { 1412 if (mumps->ICNTL20 == 10) { 1413 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1414 PetscCall(VecGetArrayRead(b, &rarray)); 1415 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray)); 1416 } else { 1417 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1418 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1419 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1420 if (!mumps->myid) { 1421 PetscCall(VecGetArray(mumps->b_seq, &array)); 1422 mumps->id.rhs = (MumpsScalar *)array; 1423 } 1424 } 1425 } else { /* petsc_size == 1 */ 1426 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1427 PetscCall(VecCopy(b, x)); 1428 PetscCall(VecGetArray(x, &array)); 1429 mumps->id.rhs = (MumpsScalar *)array; 1430 } 1431 1432 /* 1433 handle condensation step of Schur complement (if any) 1434 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1435 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1436 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1437 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1438 */ 1439 if (mumps->id.size_schur > 0) { 1440 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1441 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1442 second_solve = PETSC_TRUE; 1443 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1444 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1445 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1446 } 1447 /* solve phase */ 1448 mumps->id.job = JOB_SOLVE; 1449 PetscMUMPS_c(mumps); 1450 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1451 1452 /* handle expansion step of Schur complement (if any) */ 1453 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1454 else if (mumps->id.ICNTL(26) == 1) { 1455 PetscCall(MatMumpsSolveSchur_Private(A)); 1456 for (i = 0; i < mumps->id.size_schur; ++i) { 1457 #if !defined(PETSC_USE_COMPLEX) 1458 PetscScalar val = mumps->id.redrhs[i]; 1459 #else 1460 PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i; 1461 #endif 1462 array[mumps->id.listvar_schur[i] - 1] = val; 1463 } 1464 } 1465 1466 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1467 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1468 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1469 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1470 } 1471 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1472 PetscInt *isol2_loc = NULL; 1473 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1474 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1475 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1476 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1477 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1478 PetscCall(ISDestroy(&is_iden)); 1479 PetscCall(ISDestroy(&is_petsc)); 1480 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1481 } 1482 1483 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1484 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 1485 } 1486 1487 if (mumps->petsc_size > 1) { 1488 if (mumps->ICNTL20 == 10) { 1489 PetscCall(VecRestoreArrayRead(b, &rarray)); 1490 } else if (!mumps->myid) { 1491 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 1492 } 1493 } else PetscCall(VecRestoreArray(x, &array)); 1494 1495 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 1500 { 1501 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1502 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1503 1504 PetscFunctionBegin; 1505 mumps->id.ICNTL(9) = 0; 1506 PetscCall(MatSolve_MUMPS(A, b, x)); 1507 mumps->id.ICNTL(9) = value; 1508 PetscFunctionReturn(PETSC_SUCCESS); 1509 } 1510 1511 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 1512 { 1513 Mat Bt = NULL; 1514 PetscBool denseX, denseB, flg, flgT; 1515 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1516 PetscInt i, nrhs, M; 1517 PetscScalar *array; 1518 const PetscScalar *rbray; 1519 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 1520 PetscMUMPSInt *isol_loc, *isol_loc_save; 1521 PetscScalar *bray, *sol_loc, *sol_loc_save; 1522 IS is_to, is_from; 1523 PetscInt k, proc, j, m, myrstart; 1524 const PetscInt *rstart; 1525 Vec v_mpi, msol_loc; 1526 VecScatter scat_sol; 1527 Vec b_seq; 1528 VecScatter scat_rhs; 1529 PetscScalar *aa; 1530 PetscInt spnr, *ia, *ja; 1531 Mat_MPIAIJ *b = NULL; 1532 1533 PetscFunctionBegin; 1534 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 1535 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 1536 1537 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 1538 if (denseB) { 1539 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 1540 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1541 } else { /* sparse B */ 1542 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 1543 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 1544 if (flgT) { /* input B is transpose of actual RHS matrix, 1545 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1546 PetscCall(MatTransposeGetMat(B, &Bt)); 1547 } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 1548 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 1549 } 1550 1551 PetscCall(MatGetSize(B, &M, &nrhs)); 1552 mumps->id.nrhs = nrhs; 1553 mumps->id.lrhs = M; 1554 mumps->id.rhs = NULL; 1555 1556 if (mumps->petsc_size == 1) { 1557 PetscScalar *aa; 1558 PetscInt spnr, *ia, *ja; 1559 PetscBool second_solve = PETSC_FALSE; 1560 1561 PetscCall(MatDenseGetArray(X, &array)); 1562 mumps->id.rhs = (MumpsScalar *)array; 1563 1564 if (denseB) { 1565 /* copy B to X */ 1566 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1567 PetscCall(PetscArraycpy(array, rbray, M * nrhs)); 1568 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1569 } else { /* sparse B */ 1570 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 1571 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1572 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1573 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1574 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1575 } 1576 /* handle condensation step of Schur complement (if any) */ 1577 if (mumps->id.size_schur > 0) { 1578 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1579 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1580 second_solve = PETSC_TRUE; 1581 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1582 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1583 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1584 } 1585 /* solve phase */ 1586 mumps->id.job = JOB_SOLVE; 1587 PetscMUMPS_c(mumps); 1588 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1589 1590 /* handle expansion step of Schur complement (if any) */ 1591 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1592 else if (mumps->id.ICNTL(26) == 1) { 1593 PetscCall(MatMumpsSolveSchur_Private(A)); 1594 for (j = 0; j < nrhs; ++j) 1595 for (i = 0; i < mumps->id.size_schur; ++i) { 1596 #if !defined(PETSC_USE_COMPLEX) 1597 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs]; 1598 #else 1599 PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i; 1600 #endif 1601 array[mumps->id.listvar_schur[i] - 1 + j * M] = val; 1602 } 1603 } 1604 if (!denseB) { /* sparse B */ 1605 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 1606 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1607 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1608 } 1609 PetscCall(MatDenseRestoreArray(X, &array)); 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 1614 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1615 1616 /* create msol_loc to hold mumps local solution */ 1617 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1618 sol_loc_save = (PetscScalar *)mumps->id.sol_loc; 1619 1620 lsol_loc = mumps->id.lsol_loc; 1621 nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */ 1622 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 1623 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 1624 mumps->id.isol_loc = isol_loc; 1625 1626 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 1627 1628 if (denseB) { 1629 if (mumps->ICNTL20 == 10) { 1630 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1631 PetscCall(MatDenseGetArrayRead(B, &rbray)); 1632 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray)); 1633 PetscCall(MatDenseRestoreArrayRead(B, &rbray)); 1634 PetscCall(MatGetLocalSize(B, &m, NULL)); 1635 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi)); 1636 } else { 1637 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1638 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1639 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1640 0, re-arrange B into desired order, which is a local operation. 1641 */ 1642 1643 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1644 /* wrap dense rhs matrix B into a vector v_mpi */ 1645 PetscCall(MatGetLocalSize(B, &m, NULL)); 1646 PetscCall(MatDenseGetArray(B, &bray)); 1647 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1648 PetscCall(MatDenseRestoreArray(B, &bray)); 1649 1650 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1651 if (!mumps->myid) { 1652 PetscInt *idx; 1653 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1654 PetscCall(PetscMalloc1(nrhs * M, &idx)); 1655 PetscCall(MatGetOwnershipRanges(B, &rstart)); 1656 k = 0; 1657 for (proc = 0; proc < mumps->petsc_size; proc++) { 1658 for (j = 0; j < nrhs; j++) { 1659 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 1660 } 1661 } 1662 1663 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq)); 1664 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to)); 1665 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from)); 1666 } else { 1667 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 1668 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 1669 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 1670 } 1671 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 1672 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1673 PetscCall(ISDestroy(&is_to)); 1674 PetscCall(ISDestroy(&is_from)); 1675 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1676 1677 if (!mumps->myid) { /* define rhs on the host */ 1678 PetscCall(VecGetArray(b_seq, &bray)); 1679 mumps->id.rhs = (MumpsScalar *)bray; 1680 PetscCall(VecRestoreArray(b_seq, &bray)); 1681 } 1682 } 1683 } else { /* sparse B */ 1684 b = (Mat_MPIAIJ *)Bt->data; 1685 1686 /* wrap dense X into a vector v_mpi */ 1687 PetscCall(MatGetLocalSize(X, &m, NULL)); 1688 PetscCall(MatDenseGetArray(X, &bray)); 1689 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi)); 1690 PetscCall(MatDenseRestoreArray(X, &bray)); 1691 1692 if (!mumps->myid) { 1693 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 1694 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1695 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 1696 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 1697 mumps->id.rhs_sparse = (MumpsScalar *)aa; 1698 } else { 1699 mumps->id.irhs_ptr = NULL; 1700 mumps->id.irhs_sparse = NULL; 1701 mumps->id.nz_rhs = 0; 1702 mumps->id.rhs_sparse = NULL; 1703 } 1704 } 1705 1706 /* solve phase */ 1707 mumps->id.job = JOB_SOLVE; 1708 PetscMUMPS_c(mumps); 1709 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1710 1711 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1712 PetscCall(MatDenseGetArray(X, &array)); 1713 PetscCall(VecPlaceArray(v_mpi, array)); 1714 1715 /* create scatter scat_sol */ 1716 PetscCall(MatGetOwnershipRanges(X, &rstart)); 1717 /* iidx: index for scatter mumps solution to petsc X */ 1718 1719 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 1720 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 1721 for (i = 0; i < lsol_loc; i++) { 1722 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1723 1724 for (proc = 0; proc < mumps->petsc_size; proc++) { 1725 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 1726 myrstart = rstart[proc]; 1727 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1728 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1729 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 1730 break; 1731 } 1732 } 1733 1734 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 1735 } 1736 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 1737 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 1738 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1739 PetscCall(ISDestroy(&is_from)); 1740 PetscCall(ISDestroy(&is_to)); 1741 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 1742 PetscCall(MatDenseRestoreArray(X, &array)); 1743 1744 /* free spaces */ 1745 mumps->id.sol_loc = (MumpsScalar *)sol_loc_save; 1746 mumps->id.isol_loc = isol_loc_save; 1747 1748 PetscCall(PetscFree2(sol_loc, isol_loc)); 1749 PetscCall(PetscFree(idxx)); 1750 PetscCall(VecDestroy(&msol_loc)); 1751 PetscCall(VecDestroy(&v_mpi)); 1752 if (!denseB) { 1753 if (!mumps->myid) { 1754 b = (Mat_MPIAIJ *)Bt->data; 1755 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 1756 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 1757 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 1758 } 1759 } else { 1760 if (mumps->ICNTL20 == 0) { 1761 PetscCall(VecDestroy(&b_seq)); 1762 PetscCall(VecScatterDestroy(&scat_rhs)); 1763 } 1764 } 1765 PetscCall(VecScatterDestroy(&scat_sol)); 1766 PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)))); 1767 PetscFunctionReturn(PETSC_SUCCESS); 1768 } 1769 1770 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 1771 { 1772 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1773 const PetscMUMPSInt value = mumps->id.ICNTL(9); 1774 1775 PetscFunctionBegin; 1776 mumps->id.ICNTL(9) = 0; 1777 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1778 mumps->id.ICNTL(9) = value; 1779 PetscFunctionReturn(PETSC_SUCCESS); 1780 } 1781 1782 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 1783 { 1784 PetscBool flg; 1785 Mat B; 1786 1787 PetscFunctionBegin; 1788 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 1789 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 1790 1791 /* Create B=Bt^T that uses Bt's data structure */ 1792 PetscCall(MatCreateTranspose(Bt, &B)); 1793 1794 PetscCall(MatMatSolve_MUMPS(A, B, X)); 1795 PetscCall(MatDestroy(&B)); 1796 PetscFunctionReturn(PETSC_SUCCESS); 1797 } 1798 1799 #if !defined(PETSC_USE_COMPLEX) 1800 /* 1801 input: 1802 F: numeric factor 1803 output: 1804 nneg: total number of negative pivots 1805 nzero: total number of zero pivots 1806 npos: (global dimension of F) - nneg - nzero 1807 */ 1808 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 1809 { 1810 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 1811 PetscMPIInt size; 1812 1813 PetscFunctionBegin; 1814 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 1815 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1816 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 1817 1818 if (nneg) *nneg = mumps->id.INFOG(12); 1819 if (nzero || npos) { 1820 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1821 if (nzero) *nzero = mumps->id.INFOG(28); 1822 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1823 } 1824 PetscFunctionReturn(PETSC_SUCCESS); 1825 } 1826 #endif 1827 1828 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 1829 { 1830 PetscInt i, nreqs; 1831 PetscMUMPSInt *irn, *jcn; 1832 PetscMPIInt count; 1833 PetscInt64 totnnz, remain; 1834 const PetscInt osize = mumps->omp_comm_size; 1835 PetscScalar *val; 1836 1837 PetscFunctionBegin; 1838 if (osize > 1) { 1839 if (reuse == MAT_INITIAL_MATRIX) { 1840 /* master first gathers counts of nonzeros to receive */ 1841 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 1842 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 1843 1844 /* Then each computes number of send/recvs */ 1845 if (mumps->is_omp_master) { 1846 /* Start from 1 since self communication is not done in MPI */ 1847 nreqs = 0; 1848 for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1849 } else { 1850 nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 1851 } 1852 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 1853 1854 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1855 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1856 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1857 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1858 */ 1859 nreqs = 0; /* counter for actual send/recvs */ 1860 if (mumps->is_omp_master) { 1861 for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1862 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 1863 PetscCall(PetscMalloc1(totnnz, &val)); 1864 1865 /* Self communication */ 1866 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 1867 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 1868 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 1869 1870 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1871 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1872 PetscCall(PetscFree(mumps->val_alloc)); 1873 mumps->nnz = totnnz; 1874 mumps->irn = irn; 1875 mumps->jcn = jcn; 1876 mumps->val = mumps->val_alloc = val; 1877 1878 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1879 jcn += mumps->recvcount[0]; 1880 val += mumps->recvcount[0]; 1881 1882 /* Remote communication */ 1883 for (i = 1; i < osize; i++) { 1884 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1885 remain = mumps->recvcount[i] - count; 1886 while (count > 0) { 1887 PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1888 PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1889 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1890 irn += count; 1891 jcn += count; 1892 val += count; 1893 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1894 remain -= count; 1895 } 1896 } 1897 } else { 1898 irn = mumps->irn; 1899 jcn = mumps->jcn; 1900 val = mumps->val; 1901 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1902 remain = mumps->nnz - count; 1903 while (count > 0) { 1904 PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1905 PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1906 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1907 irn += count; 1908 jcn += count; 1909 val += count; 1910 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1911 remain -= count; 1912 } 1913 } 1914 } else { 1915 nreqs = 0; 1916 if (mumps->is_omp_master) { 1917 val = mumps->val + mumps->recvcount[0]; 1918 for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 1919 count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX); 1920 remain = mumps->recvcount[i] - count; 1921 while (count > 0) { 1922 PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1923 val += count; 1924 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1925 remain -= count; 1926 } 1927 } 1928 } else { 1929 val = mumps->val; 1930 count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX); 1931 remain = mumps->nnz - count; 1932 while (count > 0) { 1933 PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 1934 val += count; 1935 count = PetscMin(remain, PETSC_MPI_INT_MAX); 1936 remain -= count; 1937 } 1938 } 1939 } 1940 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 1941 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1942 } 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) 1947 { 1948 Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data; 1949 PetscBool isMPIAIJ; 1950 1951 PetscFunctionBegin; 1952 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1953 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1954 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1955 PetscFunctionReturn(PETSC_SUCCESS); 1956 } 1957 1958 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1959 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 1960 1961 /* numerical factorization phase */ 1962 mumps->id.job = JOB_FACTNUMERIC; 1963 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1964 if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val; 1965 } else { 1966 mumps->id.a_loc = (MumpsScalar *)mumps->val; 1967 } 1968 PetscMUMPS_c(mumps); 1969 if (mumps->id.INFOG(1) < 0) { 1970 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1971 if (mumps->id.INFOG(1) == -10) { 1972 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1973 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1974 } else if (mumps->id.INFOG(1) == -13) { 1975 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1976 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1977 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1978 PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1979 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1980 } else { 1981 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1982 F->factorerrortype = MAT_FACTOR_OTHER; 1983 } 1984 } 1985 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 1986 1987 F->assembled = PETSC_TRUE; 1988 1989 if (F->schur) { /* reset Schur status to unfactored */ 1990 #if defined(PETSC_HAVE_CUDA) 1991 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1992 #endif 1993 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1994 mumps->id.ICNTL(19) = 2; 1995 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 1996 } 1997 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 1998 } 1999 2000 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2001 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2002 2003 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2004 if (mumps->petsc_size > 1) { 2005 PetscInt lsol_loc; 2006 PetscScalar *sol_loc; 2007 2008 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ)); 2009 2010 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2011 if (mumps->x_seq) { 2012 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2013 PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc)); 2014 PetscCall(VecDestroy(&mumps->x_seq)); 2015 } 2016 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2017 PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc)); 2018 mumps->id.lsol_loc = lsol_loc; 2019 mumps->id.sol_loc = (MumpsScalar *)sol_loc; 2020 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq)); 2021 } 2022 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 2023 PetscFunctionReturn(PETSC_SUCCESS); 2024 } 2025 2026 /* Sets MUMPS options from the options database */ 2027 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2028 { 2029 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2030 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2031 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2032 PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1); 2033 MumpsScalar *arr; 2034 2035 PetscFunctionBegin; 2036 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2037 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2038 PetscInt nthreads = 0; 2039 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2040 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2041 2042 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2043 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2044 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2045 2046 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2047 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2048 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2049 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2050 if (mumps->use_petsc_omp_support) { 2051 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2052 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2053 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2054 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2055 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2056 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2057 #endif 2058 } else { 2059 mumps->omp_comm = PETSC_COMM_SELF; 2060 mumps->mumps_comm = mumps->petsc_comm; 2061 mumps->is_omp_master = PETSC_TRUE; 2062 } 2063 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2064 mumps->reqs = NULL; 2065 mumps->tag = 0; 2066 2067 if (mumps->mumps_comm != MPI_COMM_NULL) { 2068 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2069 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2070 MPI_Comm comm; 2071 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2072 mumps->mumps_comm = comm; 2073 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2074 } 2075 2076 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2077 mumps->id.job = JOB_INIT; 2078 mumps->id.par = 1; /* host participates factorizaton and solve */ 2079 mumps->id.sym = mumps->sym; 2080 2081 size = mumps->id.size_schur; 2082 arr = mumps->id.schur; 2083 listvar_schur = mumps->id.listvar_schur; 2084 PetscMUMPS_c(mumps); 2085 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2086 /* restore cached ICNTL and CNTL values */ 2087 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2088 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl]; 2089 PetscCall(PetscFree(mumps->ICNTL_pre)); 2090 PetscCall(PetscFree(mumps->CNTL_pre)); 2091 2092 if (schur) { 2093 mumps->id.size_schur = size; 2094 mumps->id.schur_lld = size; 2095 mumps->id.schur = arr; 2096 mumps->id.listvar_schur = listvar_schur; 2097 if (mumps->petsc_size > 1) { 2098 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2099 2100 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2101 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2102 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm)); 2103 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2104 } else { 2105 if (F->factortype == MAT_FACTOR_LU) { 2106 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2107 } else { 2108 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2109 } 2110 } 2111 mumps->id.ICNTL(26) = -1; 2112 } 2113 2114 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2115 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2116 */ 2117 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2118 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm)); 2119 2120 mumps->scat_rhs = NULL; 2121 mumps->scat_sol = NULL; 2122 2123 /* set PETSc-MUMPS default options - override MUMPS default */ 2124 mumps->id.ICNTL(3) = 0; 2125 mumps->id.ICNTL(4) = 0; 2126 if (mumps->petsc_size == 1) { 2127 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2128 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2129 } else { 2130 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2131 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2132 } 2133 } 2134 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2135 if (flg) mumps->id.ICNTL(1) = icntl; 2136 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2137 if (flg) mumps->id.ICNTL(2) = icntl; 2138 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2139 if (flg) mumps->id.ICNTL(3) = icntl; 2140 2141 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2142 if (flg) mumps->id.ICNTL(4) = icntl; 2143 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2144 2145 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2146 if (flg) mumps->id.ICNTL(6) = icntl; 2147 2148 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2149 if (flg) { 2150 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2151 mumps->id.ICNTL(7) = icntl; 2152 } 2153 2154 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2155 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2156 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2157 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2158 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2159 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2160 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2161 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2162 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 2163 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2164 if (flg) { 2165 PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled"); 2166 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2167 } 2168 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2169 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2170 PetscCall(MatDestroy(&F->schur)); 2171 PetscCall(MatMumpsResetSchur_Private(mumps)); 2172 } 2173 2174 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2175 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2176 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2177 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2178 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2179 In short, we could not use distributed RHS with MPICH until v4.0b1. 2180 */ 2181 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 2182 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 2183 #else 2184 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 2185 #endif 2186 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2187 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2188 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2189 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2190 #endif 2191 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2192 2193 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2194 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2195 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2196 if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ } 2197 2198 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2199 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2200 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2201 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2202 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2203 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2204 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2205 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2206 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2207 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2208 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2209 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2210 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2211 2212 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL)); 2213 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL)); 2214 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL)); 2215 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL)); 2216 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL)); 2217 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL)); 2218 2219 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2220 2221 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2222 if (ninfo) { 2223 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2224 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2225 mumps->ninfo = ninfo; 2226 for (i = 0; i < ninfo; i++) { 2227 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2228 mumps->info[i] = info[i]; 2229 } 2230 } 2231 PetscOptionsEnd(); 2232 PetscFunctionReturn(PETSC_SUCCESS); 2233 } 2234 2235 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) 2236 { 2237 PetscFunctionBegin; 2238 if (mumps->id.INFOG(1) < 0) { 2239 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2240 if (mumps->id.INFOG(1) == -6) { 2241 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2242 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2243 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2244 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2245 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2246 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 2247 PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n")); 2248 } else { 2249 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2250 F->factorerrortype = MAT_FACTOR_OTHER; 2251 } 2252 } 2253 PetscFunctionReturn(PETSC_SUCCESS); 2254 } 2255 2256 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2257 { 2258 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2259 Vec b; 2260 const PetscInt M = A->rmap->N; 2261 2262 PetscFunctionBegin; 2263 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2264 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2265 PetscFunctionReturn(PETSC_SUCCESS); 2266 } 2267 2268 /* Set MUMPS options from the options database */ 2269 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2270 2271 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2272 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2273 2274 /* analysis phase */ 2275 mumps->id.job = JOB_FACTSYMBOLIC; 2276 mumps->id.n = M; 2277 switch (mumps->id.ICNTL(18)) { 2278 case 0: /* centralized assembled matrix input */ 2279 if (!mumps->myid) { 2280 mumps->id.nnz = mumps->nnz; 2281 mumps->id.irn = mumps->irn; 2282 mumps->id.jcn = mumps->jcn; 2283 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2284 if (r) { 2285 mumps->id.ICNTL(7) = 1; 2286 if (!mumps->myid) { 2287 const PetscInt *idx; 2288 PetscInt i; 2289 2290 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2291 PetscCall(ISGetIndices(r, &idx)); 2292 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2293 PetscCall(ISRestoreIndices(r, &idx)); 2294 } 2295 } 2296 } 2297 break; 2298 case 3: /* distributed assembled matrix input (size>1) */ 2299 mumps->id.nnz_loc = mumps->nnz; 2300 mumps->id.irn_loc = mumps->irn; 2301 mumps->id.jcn_loc = mumps->jcn; 2302 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2303 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2304 PetscCall(MatCreateVecs(A, NULL, &b)); 2305 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2306 PetscCall(VecDestroy(&b)); 2307 } 2308 break; 2309 } 2310 PetscMUMPS_c(mumps); 2311 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2312 2313 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2314 F->ops->solve = MatSolve_MUMPS; 2315 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2316 F->ops->matsolve = MatMatSolve_MUMPS; 2317 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2318 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2319 2320 mumps->matstruc = SAME_NONZERO_PATTERN; 2321 PetscFunctionReturn(PETSC_SUCCESS); 2322 } 2323 2324 /* Note the Petsc r and c permutations are ignored */ 2325 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 2326 { 2327 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2328 Vec b; 2329 const PetscInt M = A->rmap->N; 2330 2331 PetscFunctionBegin; 2332 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2333 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2334 PetscFunctionReturn(PETSC_SUCCESS); 2335 } 2336 2337 /* Set MUMPS options from the options database */ 2338 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2339 2340 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2341 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2342 2343 /* analysis phase */ 2344 mumps->id.job = JOB_FACTSYMBOLIC; 2345 mumps->id.n = M; 2346 switch (mumps->id.ICNTL(18)) { 2347 case 0: /* centralized assembled matrix input */ 2348 if (!mumps->myid) { 2349 mumps->id.nnz = mumps->nnz; 2350 mumps->id.irn = mumps->irn; 2351 mumps->id.jcn = mumps->jcn; 2352 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2353 } 2354 break; 2355 case 3: /* distributed assembled matrix input (size>1) */ 2356 mumps->id.nnz_loc = mumps->nnz; 2357 mumps->id.irn_loc = mumps->irn; 2358 mumps->id.jcn_loc = mumps->jcn; 2359 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2360 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2361 PetscCall(MatCreateVecs(A, NULL, &b)); 2362 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2363 PetscCall(VecDestroy(&b)); 2364 } 2365 break; 2366 } 2367 PetscMUMPS_c(mumps); 2368 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2369 2370 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2371 F->ops->solve = MatSolve_MUMPS; 2372 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2373 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2374 2375 mumps->matstruc = SAME_NONZERO_PATTERN; 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378 2379 /* Note the Petsc r permutation and factor info are ignored */ 2380 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) 2381 { 2382 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2383 Vec b; 2384 const PetscInt M = A->rmap->N; 2385 2386 PetscFunctionBegin; 2387 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2388 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 2389 PetscFunctionReturn(PETSC_SUCCESS); 2390 } 2391 2392 /* Set MUMPS options from the options database */ 2393 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2394 2395 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2396 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2397 2398 /* analysis phase */ 2399 mumps->id.job = JOB_FACTSYMBOLIC; 2400 mumps->id.n = M; 2401 switch (mumps->id.ICNTL(18)) { 2402 case 0: /* centralized assembled matrix input */ 2403 if (!mumps->myid) { 2404 mumps->id.nnz = mumps->nnz; 2405 mumps->id.irn = mumps->irn; 2406 mumps->id.jcn = mumps->jcn; 2407 if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val; 2408 } 2409 break; 2410 case 3: /* distributed assembled matrix input (size>1) */ 2411 mumps->id.nnz_loc = mumps->nnz; 2412 mumps->id.irn_loc = mumps->irn; 2413 mumps->id.jcn_loc = mumps->jcn; 2414 if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val; 2415 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2416 PetscCall(MatCreateVecs(A, NULL, &b)); 2417 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2418 PetscCall(VecDestroy(&b)); 2419 } 2420 break; 2421 } 2422 PetscMUMPS_c(mumps); 2423 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2424 2425 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2426 F->ops->solve = MatSolve_MUMPS; 2427 F->ops->solvetranspose = MatSolve_MUMPS; 2428 F->ops->matsolve = MatMatSolve_MUMPS; 2429 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2430 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2431 #if defined(PETSC_USE_COMPLEX) 2432 F->ops->getinertia = NULL; 2433 #else 2434 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2435 #endif 2436 2437 mumps->matstruc = SAME_NONZERO_PATTERN; 2438 PetscFunctionReturn(PETSC_SUCCESS); 2439 } 2440 2441 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 2442 { 2443 PetscBool iascii; 2444 PetscViewerFormat format; 2445 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2446 2447 PetscFunctionBegin; 2448 /* check if matrix is mumps type */ 2449 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 2450 2451 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2452 if (iascii) { 2453 PetscCall(PetscViewerGetFormat(viewer, &format)); 2454 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2455 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 2456 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2457 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 2458 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 2459 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 2460 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 2461 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 2462 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 2463 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 2464 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 2465 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 2466 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 2467 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 2468 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 2469 if (mumps->id.ICNTL(11) > 0) { 2470 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4))); 2471 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5))); 2472 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6))); 2473 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8))); 2474 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9))); 2475 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11))); 2476 } 2477 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 2478 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 2479 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 2480 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 2481 /* ICNTL(15-17) not used */ 2482 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 2483 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 2484 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 2485 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 2486 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 2487 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 2488 2489 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 2490 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 2491 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 2492 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 2493 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 2494 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 2495 2496 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 2497 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 2498 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 2499 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 2500 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 2501 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 2502 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 2503 2504 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1))); 2505 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2))); 2506 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3))); 2507 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4))); 2508 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5))); 2509 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7))); 2510 2511 /* information local to each processor */ 2512 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 2513 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2514 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1))); 2515 PetscCall(PetscViewerFlush(viewer)); 2516 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 2517 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2))); 2518 PetscCall(PetscViewerFlush(viewer)); 2519 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 2520 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3))); 2521 PetscCall(PetscViewerFlush(viewer)); 2522 2523 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 2524 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 2525 PetscCall(PetscViewerFlush(viewer)); 2526 2527 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 2528 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 2529 PetscCall(PetscViewerFlush(viewer)); 2530 2531 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 2532 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 2533 PetscCall(PetscViewerFlush(viewer)); 2534 2535 if (mumps->ninfo && mumps->ninfo <= 80) { 2536 PetscInt i; 2537 for (i = 0; i < mumps->ninfo; i++) { 2538 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 2539 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 2540 PetscCall(PetscViewerFlush(viewer)); 2541 } 2542 } 2543 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2544 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 2545 2546 if (mumps->myid == 0) { /* information from the host */ 2547 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1))); 2548 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2))); 2549 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3))); 2550 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34))); 2551 2552 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 2553 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 2554 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 2555 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 2556 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 2557 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 2558 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 2559 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 2560 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 2561 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 2562 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 2563 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 2564 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 2565 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 2566 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 2567 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 2568 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 2569 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 2570 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 2571 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 2572 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 2573 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 2574 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 2575 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 2576 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 2577 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 2578 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 2579 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 2580 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 2581 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 2582 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 2583 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 2584 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 2585 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 2586 } 2587 } 2588 } 2589 PetscFunctionReturn(PETSC_SUCCESS); 2590 } 2591 2592 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) 2593 { 2594 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2595 2596 PetscFunctionBegin; 2597 info->block_size = 1.0; 2598 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2599 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 2600 info->nz_unneeded = 0.0; 2601 info->assemblies = 0.0; 2602 info->mallocs = 0.0; 2603 info->memory = 0.0; 2604 info->fill_ratio_given = 0; 2605 info->fill_ratio_needed = 0; 2606 info->factor_mallocs = 0; 2607 PetscFunctionReturn(PETSC_SUCCESS); 2608 } 2609 2610 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2611 { 2612 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2613 const PetscScalar *arr; 2614 const PetscInt *idxs; 2615 PetscInt size, i; 2616 2617 PetscFunctionBegin; 2618 PetscCall(ISGetLocalSize(is, &size)); 2619 /* Schur complement matrix */ 2620 PetscCall(MatDestroy(&F->schur)); 2621 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 2622 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 2623 mumps->id.schur = (MumpsScalar *)arr; 2624 mumps->id.size_schur = size; 2625 mumps->id.schur_lld = size; 2626 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 2627 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 2628 2629 /* MUMPS expects Fortran style indices */ 2630 PetscCall(PetscFree(mumps->id.listvar_schur)); 2631 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 2632 PetscCall(ISGetIndices(is, &idxs)); 2633 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]))); 2634 PetscCall(ISRestoreIndices(is, &idxs)); 2635 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2636 mumps->id.ICNTL(26) = -1; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 2641 { 2642 Mat St; 2643 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2644 PetscScalar *array; 2645 2646 PetscFunctionBegin; 2647 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 2648 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 2649 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 2650 PetscCall(MatSetType(St, MATDENSE)); 2651 PetscCall(MatSetUp(St)); 2652 PetscCall(MatDenseGetArray(St, &array)); 2653 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2654 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2655 PetscInt i, j, N = mumps->id.size_schur; 2656 for (i = 0; i < N; i++) { 2657 for (j = 0; j < N; j++) { 2658 #if !defined(PETSC_USE_COMPLEX) 2659 PetscScalar val = mumps->id.schur[i * N + j]; 2660 #else 2661 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2662 #endif 2663 array[j * N + i] = val; 2664 } 2665 } 2666 } else { /* stored by columns */ 2667 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2668 } 2669 } else { /* either full or lower-triangular (not packed) */ 2670 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2671 PetscInt i, j, N = mumps->id.size_schur; 2672 for (i = 0; i < N; i++) { 2673 for (j = i; j < N; j++) { 2674 #if !defined(PETSC_USE_COMPLEX) 2675 PetscScalar val = mumps->id.schur[i * N + j]; 2676 #else 2677 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2678 #endif 2679 array[i * N + j] = array[j * N + i] = val; 2680 } 2681 } 2682 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2683 PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur)); 2684 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2685 PetscInt i, j, N = mumps->id.size_schur; 2686 for (i = 0; i < N; i++) { 2687 for (j = 0; j < i + 1; j++) { 2688 #if !defined(PETSC_USE_COMPLEX) 2689 PetscScalar val = mumps->id.schur[i * N + j]; 2690 #else 2691 PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i; 2692 #endif 2693 array[i * N + j] = array[j * N + i] = val; 2694 } 2695 } 2696 } 2697 } 2698 PetscCall(MatDenseRestoreArray(St, &array)); 2699 *S = St; 2700 PetscFunctionReturn(PETSC_SUCCESS); 2701 } 2702 2703 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 2704 { 2705 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2706 2707 PetscFunctionBegin; 2708 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2709 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2710 for (i = 0; i < nICNTL_pre; ++i) 2711 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 2712 if (i == nICNTL_pre) { /* not already cached */ 2713 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 2714 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 2715 mumps->ICNTL_pre[0]++; 2716 } 2717 mumps->ICNTL_pre[1 + 2 * i] = icntl; 2718 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 2719 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 2720 PetscFunctionReturn(PETSC_SUCCESS); 2721 } 2722 2723 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 2724 { 2725 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2726 2727 PetscFunctionBegin; 2728 if (mumps->id.job == JOB_NULL) { 2729 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2730 *ival = 0; 2731 for (i = 0; i < nICNTL_pre; ++i) { 2732 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 2733 } 2734 } else *ival = mumps->id.ICNTL(icntl); 2735 PetscFunctionReturn(PETSC_SUCCESS); 2736 } 2737 2738 /*@ 2739 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2740 2741 Logically Collective 2742 2743 Input Parameters: 2744 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2745 . icntl - index of MUMPS parameter array ICNTL() 2746 - ival - value of MUMPS ICNTL(icntl) 2747 2748 Options Database Key: 2749 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2750 2751 Level: beginner 2752 2753 References: 2754 . * - MUMPS Users' Guide 2755 2756 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2757 @*/ 2758 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 2759 { 2760 PetscFunctionBegin; 2761 PetscValidType(F, 1); 2762 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2763 PetscValidLogicalCollectiveInt(F, icntl, 2); 2764 PetscValidLogicalCollectiveInt(F, ival, 3); 2765 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2766 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 2767 PetscFunctionReturn(PETSC_SUCCESS); 2768 } 2769 2770 /*@ 2771 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2772 2773 Logically Collective 2774 2775 Input Parameters: 2776 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2777 - icntl - index of MUMPS parameter array ICNTL() 2778 2779 Output Parameter: 2780 . ival - value of MUMPS ICNTL(icntl) 2781 2782 Level: beginner 2783 2784 References: 2785 . * - MUMPS Users' Guide 2786 2787 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2788 @*/ 2789 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 2790 { 2791 PetscFunctionBegin; 2792 PetscValidType(F, 1); 2793 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2794 PetscValidLogicalCollectiveInt(F, icntl, 2); 2795 PetscAssertPointer(ival, 3); 2796 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 2797 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 2798 PetscFunctionReturn(PETSC_SUCCESS); 2799 } 2800 2801 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 2802 { 2803 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2804 2805 PetscFunctionBegin; 2806 if (mumps->id.job == JOB_NULL) { 2807 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2808 for (i = 0; i < nCNTL_pre; ++i) 2809 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 2810 if (i == nCNTL_pre) { 2811 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 2812 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 2813 mumps->CNTL_pre[0]++; 2814 } 2815 mumps->CNTL_pre[1 + 2 * i] = icntl; 2816 mumps->CNTL_pre[2 + 2 * i] = val; 2817 } else mumps->id.CNTL(icntl) = val; 2818 PetscFunctionReturn(PETSC_SUCCESS); 2819 } 2820 2821 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 2822 { 2823 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2824 2825 PetscFunctionBegin; 2826 if (mumps->id.job == JOB_NULL) { 2827 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2828 *val = 0.0; 2829 for (i = 0; i < nCNTL_pre; ++i) { 2830 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 2831 } 2832 } else *val = mumps->id.CNTL(icntl); 2833 PetscFunctionReturn(PETSC_SUCCESS); 2834 } 2835 2836 /*@ 2837 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2838 2839 Logically Collective 2840 2841 Input Parameters: 2842 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2843 . icntl - index of MUMPS parameter array CNTL() 2844 - val - value of MUMPS CNTL(icntl) 2845 2846 Options Database Key: 2847 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2848 2849 Level: beginner 2850 2851 References: 2852 . * - MUMPS Users' Guide 2853 2854 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2855 @*/ 2856 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 2857 { 2858 PetscFunctionBegin; 2859 PetscValidType(F, 1); 2860 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2861 PetscValidLogicalCollectiveInt(F, icntl, 2); 2862 PetscValidLogicalCollectiveReal(F, val, 3); 2863 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2864 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 2865 PetscFunctionReturn(PETSC_SUCCESS); 2866 } 2867 2868 /*@ 2869 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2870 2871 Logically Collective 2872 2873 Input Parameters: 2874 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 2875 - icntl - index of MUMPS parameter array CNTL() 2876 2877 Output Parameter: 2878 . val - value of MUMPS CNTL(icntl) 2879 2880 Level: beginner 2881 2882 References: 2883 . * - MUMPS Users' Guide 2884 2885 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2886 @*/ 2887 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 2888 { 2889 PetscFunctionBegin; 2890 PetscValidType(F, 1); 2891 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 2892 PetscValidLogicalCollectiveInt(F, icntl, 2); 2893 PetscAssertPointer(val, 3); 2894 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 2895 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 2896 PetscFunctionReturn(PETSC_SUCCESS); 2897 } 2898 2899 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 2900 { 2901 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2902 2903 PetscFunctionBegin; 2904 *info = mumps->id.INFO(icntl); 2905 PetscFunctionReturn(PETSC_SUCCESS); 2906 } 2907 2908 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 2909 { 2910 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2911 2912 PetscFunctionBegin; 2913 *infog = mumps->id.INFOG(icntl); 2914 PetscFunctionReturn(PETSC_SUCCESS); 2915 } 2916 2917 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 2918 { 2919 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2920 2921 PetscFunctionBegin; 2922 *rinfo = mumps->id.RINFO(icntl); 2923 PetscFunctionReturn(PETSC_SUCCESS); 2924 } 2925 2926 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 2927 { 2928 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2929 2930 PetscFunctionBegin; 2931 *rinfog = mumps->id.RINFOG(icntl); 2932 PetscFunctionReturn(PETSC_SUCCESS); 2933 } 2934 2935 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 2936 { 2937 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2938 2939 PetscFunctionBegin; 2940 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2941 *size = 0; 2942 *array = NULL; 2943 if (!mumps->myid) { 2944 *size = mumps->id.INFOG(28); 2945 PetscCall(PetscMalloc1(*size, array)); 2946 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 2947 } 2948 PetscFunctionReturn(PETSC_SUCCESS); 2949 } 2950 2951 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 2952 { 2953 Mat Bt = NULL, Btseq = NULL; 2954 PetscBool flg; 2955 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2956 PetscScalar *aa; 2957 PetscInt spnr, *ia, *ja, M, nrhs; 2958 2959 PetscFunctionBegin; 2960 PetscAssertPointer(spRHS, 2); 2961 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 2962 if (flg) { 2963 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 2964 } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 2965 2966 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 2967 2968 if (mumps->petsc_size > 1) { 2969 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 2970 Btseq = b->A; 2971 } else { 2972 Btseq = Bt; 2973 } 2974 2975 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 2976 mumps->id.nrhs = nrhs; 2977 mumps->id.lrhs = M; 2978 mumps->id.rhs = NULL; 2979 2980 if (!mumps->myid) { 2981 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 2982 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2983 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2984 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2985 mumps->id.rhs_sparse = (MumpsScalar *)aa; 2986 } else { 2987 mumps->id.irhs_ptr = NULL; 2988 mumps->id.irhs_sparse = NULL; 2989 mumps->id.nz_rhs = 0; 2990 mumps->id.rhs_sparse = NULL; 2991 } 2992 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2993 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2994 2995 /* solve phase */ 2996 mumps->id.job = JOB_SOLVE; 2997 PetscMUMPS_c(mumps); 2998 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2999 3000 if (!mumps->myid) { 3001 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3002 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3003 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3004 } 3005 PetscFunctionReturn(PETSC_SUCCESS); 3006 } 3007 3008 /*@ 3009 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` 3010 3011 Logically Collective 3012 3013 Input Parameter: 3014 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3015 3016 Output Parameter: 3017 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3018 3019 Level: beginner 3020 3021 References: 3022 . * - MUMPS Users' Guide 3023 3024 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3025 @*/ 3026 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3027 { 3028 PetscFunctionBegin; 3029 PetscValidType(F, 1); 3030 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3031 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3032 PetscFunctionReturn(PETSC_SUCCESS); 3033 } 3034 3035 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3036 { 3037 Mat spRHS; 3038 3039 PetscFunctionBegin; 3040 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3041 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3042 PetscCall(MatDestroy(&spRHS)); 3043 PetscFunctionReturn(PETSC_SUCCESS); 3044 } 3045 3046 /*@ 3047 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T 3048 3049 Logically Collective 3050 3051 Input Parameter: 3052 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3053 3054 Output Parameter: 3055 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3056 3057 Level: beginner 3058 3059 References: 3060 . * - MUMPS Users' Guide 3061 3062 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3063 @*/ 3064 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3065 { 3066 PetscBool flg; 3067 3068 PetscFunctionBegin; 3069 PetscValidType(F, 1); 3070 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3071 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3072 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3073 3074 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3075 PetscFunctionReturn(PETSC_SUCCESS); 3076 } 3077 3078 /*@ 3079 MatMumpsGetInfo - Get MUMPS parameter INFO() 3080 3081 Logically Collective 3082 3083 Input Parameters: 3084 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3085 - icntl - index of MUMPS parameter array INFO() 3086 3087 Output Parameter: 3088 . ival - value of MUMPS INFO(icntl) 3089 3090 Level: beginner 3091 3092 References: 3093 . * - MUMPS Users' Guide 3094 3095 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3096 @*/ 3097 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3098 { 3099 PetscFunctionBegin; 3100 PetscValidType(F, 1); 3101 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3102 PetscAssertPointer(ival, 3); 3103 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3104 PetscFunctionReturn(PETSC_SUCCESS); 3105 } 3106 3107 /*@ 3108 MatMumpsGetInfog - Get MUMPS parameter INFOG() 3109 3110 Logically Collective 3111 3112 Input Parameters: 3113 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3114 - icntl - index of MUMPS parameter array INFOG() 3115 3116 Output Parameter: 3117 . ival - value of MUMPS INFOG(icntl) 3118 3119 Level: beginner 3120 3121 References: 3122 . * - MUMPS Users' Guide 3123 3124 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3125 @*/ 3126 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3127 { 3128 PetscFunctionBegin; 3129 PetscValidType(F, 1); 3130 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3131 PetscAssertPointer(ival, 3); 3132 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3133 PetscFunctionReturn(PETSC_SUCCESS); 3134 } 3135 3136 /*@ 3137 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 3138 3139 Logically Collective 3140 3141 Input Parameters: 3142 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3143 - icntl - index of MUMPS parameter array RINFO() 3144 3145 Output Parameter: 3146 . val - value of MUMPS RINFO(icntl) 3147 3148 Level: beginner 3149 3150 References: 3151 . * - MUMPS Users' Guide 3152 3153 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3154 @*/ 3155 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3156 { 3157 PetscFunctionBegin; 3158 PetscValidType(F, 1); 3159 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3160 PetscAssertPointer(val, 3); 3161 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3162 PetscFunctionReturn(PETSC_SUCCESS); 3163 } 3164 3165 /*@ 3166 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 3167 3168 Logically Collective 3169 3170 Input Parameters: 3171 + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3172 - icntl - index of MUMPS parameter array RINFOG() 3173 3174 Output Parameter: 3175 . val - value of MUMPS RINFOG(icntl) 3176 3177 Level: beginner 3178 3179 References: 3180 . * - MUMPS Users' Guide 3181 3182 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3183 @*/ 3184 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3185 { 3186 PetscFunctionBegin; 3187 PetscValidType(F, 1); 3188 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3189 PetscAssertPointer(val, 3); 3190 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3191 PetscFunctionReturn(PETSC_SUCCESS); 3192 } 3193 3194 /*@ 3195 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() 3196 3197 Logically Collective 3198 3199 Input Parameter: 3200 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface 3201 3202 Output Parameters: 3203 + size - local size of the array. The size of the array is non-zero only on the host. 3204 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3205 for freeing this array. 3206 3207 Level: beginner 3208 3209 References: 3210 . * - MUMPS Users' Guide 3211 3212 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3213 @*/ 3214 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3215 { 3216 PetscFunctionBegin; 3217 PetscValidType(F, 1); 3218 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3219 PetscAssertPointer(size, 2); 3220 PetscAssertPointer(array, 3); 3221 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3222 PetscFunctionReturn(PETSC_SUCCESS); 3223 } 3224 3225 /*MC 3226 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3227 distributed and sequential matrices via the external package MUMPS. 3228 3229 Works with `MATAIJ` and `MATSBAIJ` matrices 3230 3231 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3232 3233 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3234 See details below. 3235 3236 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3237 3238 Options Database Keys: 3239 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3240 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3241 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3242 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3243 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3244 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3245 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3246 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3247 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3248 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3249 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3250 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3251 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3252 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3253 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3254 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3255 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3256 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3257 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3258 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3259 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3260 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 3261 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3262 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3263 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3264 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3265 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3266 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3267 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3268 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3269 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3270 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3271 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3272 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3273 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3274 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3275 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3276 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3277 3278 Level: beginner 3279 3280 Notes: 3281 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will 3282 error if the matrix is Hermitian. 3283 3284 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3285 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3286 3287 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3288 the failure with 3289 .vb 3290 KSPGetPC(ksp,&pc); 3291 PCFactorGetMatrix(pc,&mat); 3292 MatMumpsGetInfo(mat,....); 3293 MatMumpsGetInfog(mat,....); etc. 3294 .ve 3295 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3296 3297 MUMPS provides 64-bit integer support in two build modes: 3298 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3299 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3300 3301 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3302 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3303 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3304 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3305 3306 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3307 3308 Two modes to run MUMPS/PETSc with OpenMP 3309 .vb 3310 Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3311 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 3312 .ve 3313 3314 .vb 3315 -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 3316 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3317 .ve 3318 3319 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3320 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3321 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3322 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3323 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 3324 3325 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3326 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3327 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3328 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3329 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3330 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3331 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3332 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3333 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3334 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 3335 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 3336 examine the mapping result. 3337 3338 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3339 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3340 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3341 3342 References: 3343 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3344 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3345 3346 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3347 M*/ 3348 3349 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) 3350 { 3351 PetscFunctionBegin; 3352 *type = MATSOLVERMUMPS; 3353 PetscFunctionReturn(PETSC_SUCCESS); 3354 } 3355 3356 /* MatGetFactor for Seq and MPI AIJ matrices */ 3357 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3358 { 3359 Mat B; 3360 Mat_MUMPS *mumps; 3361 PetscBool isSeqAIJ, isDiag; 3362 PetscMPIInt size; 3363 3364 PetscFunctionBegin; 3365 #if defined(PETSC_USE_COMPLEX) 3366 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3367 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3368 *F = NULL; 3369 PetscFunctionReturn(PETSC_SUCCESS); 3370 } 3371 #endif 3372 /* Create the factorization matrix */ 3373 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 3374 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 3375 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3376 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3377 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3378 PetscCall(MatSetUp(B)); 3379 3380 PetscCall(PetscNew(&mumps)); 3381 3382 B->ops->view = MatView_MUMPS; 3383 B->ops->getinfo = MatGetInfo_MUMPS; 3384 3385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3386 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3387 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3388 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3389 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3390 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3391 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3392 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3393 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3394 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3395 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3396 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3397 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3398 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3399 3400 if (ftype == MAT_FACTOR_LU) { 3401 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3402 B->factortype = MAT_FACTOR_LU; 3403 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3404 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3405 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3406 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3407 mumps->sym = 0; 3408 } else { 3409 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3410 B->factortype = MAT_FACTOR_CHOLESKY; 3411 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3412 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 3413 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3414 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3415 #if defined(PETSC_USE_COMPLEX) 3416 mumps->sym = 2; 3417 #else 3418 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3419 else mumps->sym = 2; 3420 #endif 3421 } 3422 3423 /* set solvertype */ 3424 PetscCall(PetscFree(B->solvertype)); 3425 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3426 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3427 if (size == 1) { 3428 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3429 B->canuseordering = PETSC_TRUE; 3430 } 3431 B->ops->destroy = MatDestroy_MUMPS; 3432 B->data = (void *)mumps; 3433 3434 *F = B; 3435 mumps->id.job = JOB_NULL; 3436 mumps->ICNTL_pre = NULL; 3437 mumps->CNTL_pre = NULL; 3438 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3439 PetscFunctionReturn(PETSC_SUCCESS); 3440 } 3441 3442 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3443 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) 3444 { 3445 Mat B; 3446 Mat_MUMPS *mumps; 3447 PetscBool isSeqSBAIJ; 3448 PetscMPIInt size; 3449 3450 PetscFunctionBegin; 3451 #if defined(PETSC_USE_COMPLEX) 3452 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3453 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3454 *F = NULL; 3455 PetscFunctionReturn(PETSC_SUCCESS); 3456 } 3457 #endif 3458 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3459 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3460 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3461 PetscCall(MatSetUp(B)); 3462 3463 PetscCall(PetscNew(&mumps)); 3464 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 3465 if (isSeqSBAIJ) { 3466 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3467 } else { 3468 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3469 } 3470 3471 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3472 B->ops->view = MatView_MUMPS; 3473 B->ops->getinfo = MatGetInfo_MUMPS; 3474 3475 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3476 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3477 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3478 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3479 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3481 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3484 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3485 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3486 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3487 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3488 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3489 3490 B->factortype = MAT_FACTOR_CHOLESKY; 3491 #if defined(PETSC_USE_COMPLEX) 3492 mumps->sym = 2; 3493 #else 3494 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3495 else mumps->sym = 2; 3496 #endif 3497 3498 /* set solvertype */ 3499 PetscCall(PetscFree(B->solvertype)); 3500 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3501 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3502 if (size == 1) { 3503 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3504 B->canuseordering = PETSC_TRUE; 3505 } 3506 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3507 B->ops->destroy = MatDestroy_MUMPS; 3508 B->data = (void *)mumps; 3509 3510 *F = B; 3511 mumps->id.job = JOB_NULL; 3512 mumps->ICNTL_pre = NULL; 3513 mumps->CNTL_pre = NULL; 3514 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3515 PetscFunctionReturn(PETSC_SUCCESS); 3516 } 3517 3518 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 3519 { 3520 Mat B; 3521 Mat_MUMPS *mumps; 3522 PetscBool isSeqBAIJ; 3523 PetscMPIInt size; 3524 3525 PetscFunctionBegin; 3526 /* Create the factorization matrix */ 3527 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 3528 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3529 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3530 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3531 PetscCall(MatSetUp(B)); 3532 3533 PetscCall(PetscNew(&mumps)); 3534 if (ftype == MAT_FACTOR_LU) { 3535 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3536 B->factortype = MAT_FACTOR_LU; 3537 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3538 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3539 mumps->sym = 0; 3540 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3541 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3542 3543 B->ops->view = MatView_MUMPS; 3544 B->ops->getinfo = MatGetInfo_MUMPS; 3545 3546 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3547 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3548 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3549 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3550 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3551 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3552 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3553 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3554 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3555 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3556 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3557 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3558 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3559 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3560 3561 /* set solvertype */ 3562 PetscCall(PetscFree(B->solvertype)); 3563 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3564 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3565 if (size == 1) { 3566 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3567 B->canuseordering = PETSC_TRUE; 3568 } 3569 B->ops->destroy = MatDestroy_MUMPS; 3570 B->data = (void *)mumps; 3571 3572 *F = B; 3573 mumps->id.job = JOB_NULL; 3574 mumps->ICNTL_pre = NULL; 3575 mumps->CNTL_pre = NULL; 3576 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3577 PetscFunctionReturn(PETSC_SUCCESS); 3578 } 3579 3580 /* MatGetFactor for Seq and MPI SELL matrices */ 3581 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 3582 { 3583 Mat B; 3584 Mat_MUMPS *mumps; 3585 PetscBool isSeqSELL; 3586 PetscMPIInt size; 3587 3588 PetscFunctionBegin; 3589 /* Create the factorization matrix */ 3590 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 3591 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3592 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3593 PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name)); 3594 PetscCall(MatSetUp(B)); 3595 3596 PetscCall(PetscNew(&mumps)); 3597 3598 B->ops->view = MatView_MUMPS; 3599 B->ops->getinfo = MatGetInfo_MUMPS; 3600 3601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3606 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3607 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3608 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3609 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3610 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3611 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3612 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3613 3614 if (ftype == MAT_FACTOR_LU) { 3615 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3616 B->factortype = MAT_FACTOR_LU; 3617 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3618 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3619 mumps->sym = 0; 3620 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 3621 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 3622 3623 /* set solvertype */ 3624 PetscCall(PetscFree(B->solvertype)); 3625 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3626 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3627 if (size == 1) { 3628 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3629 B->canuseordering = PETSC_TRUE; 3630 } 3631 B->ops->destroy = MatDestroy_MUMPS; 3632 B->data = (void *)mumps; 3633 3634 *F = B; 3635 mumps->id.job = JOB_NULL; 3636 mumps->ICNTL_pre = NULL; 3637 mumps->CNTL_pre = NULL; 3638 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3639 PetscFunctionReturn(PETSC_SUCCESS); 3640 } 3641 3642 /* MatGetFactor for MATNEST matrices */ 3643 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 3644 { 3645 Mat B, **mats; 3646 Mat_MUMPS *mumps; 3647 PetscInt nr, nc; 3648 PetscMPIInt size; 3649 PetscBool flg = PETSC_TRUE; 3650 3651 PetscFunctionBegin; 3652 #if defined(PETSC_USE_COMPLEX) 3653 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 3654 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 3655 *F = NULL; 3656 PetscFunctionReturn(PETSC_SUCCESS); 3657 } 3658 #endif 3659 3660 /* Return if some condition is not satisfied */ 3661 *F = NULL; 3662 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 3663 if (ftype == MAT_FACTOR_CHOLESKY) { 3664 IS *rows, *cols; 3665 PetscInt *m, *M; 3666 3667 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 3668 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 3669 PetscCall(MatNestGetISs(A, rows, cols)); 3670 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 3671 if (!flg) { 3672 PetscCall(PetscFree2(rows, cols)); 3673 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 3674 PetscFunctionReturn(PETSC_SUCCESS); 3675 } 3676 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 3677 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 3678 for (PetscInt r = 0; flg && r < nr; r++) 3679 for (PetscInt k = r + 1; flg && k < nr; k++) 3680 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 3681 PetscCall(PetscFree2(m, M)); 3682 PetscCall(PetscFree2(rows, cols)); 3683 if (!flg) { 3684 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 3685 PetscFunctionReturn(PETSC_SUCCESS); 3686 } 3687 } 3688 3689 for (PetscInt r = 0; r < nr; r++) { 3690 for (PetscInt c = 0; c < nc; c++) { 3691 Mat sub = mats[r][c]; 3692 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag; 3693 3694 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 3695 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans)); 3696 if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub)); 3697 else { 3698 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans)); 3699 if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub)); 3700 } 3701 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 3702 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 3703 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 3704 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 3705 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 3706 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 3707 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 3708 if (ftype == MAT_FACTOR_CHOLESKY) { 3709 if (r == c) { 3710 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag) { 3711 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3712 flg = PETSC_FALSE; 3713 } 3714 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) { 3715 PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 3716 flg = PETSC_FALSE; 3717 } 3718 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) { 3719 PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 3720 flg = PETSC_FALSE; 3721 } 3722 } 3723 } 3724 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 3725 3726 /* Create the factorization matrix */ 3727 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3728 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3729 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 3730 PetscCall(MatSetUp(B)); 3731 3732 PetscCall(PetscNew(&mumps)); 3733 3734 B->ops->view = MatView_MUMPS; 3735 B->ops->getinfo = MatGetInfo_MUMPS; 3736 3737 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 3738 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 3739 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 3740 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 3741 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 3742 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 3743 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 3744 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 3745 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 3746 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 3747 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 3748 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 3749 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 3750 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 3751 3752 if (ftype == MAT_FACTOR_LU) { 3753 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3754 B->factortype = MAT_FACTOR_LU; 3755 mumps->sym = 0; 3756 } else { 3757 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3758 B->factortype = MAT_FACTOR_CHOLESKY; 3759 #if defined(PETSC_USE_COMPLEX) 3760 mumps->sym = 2; 3761 #else 3762 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3763 else mumps->sym = 2; 3764 #endif 3765 } 3766 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 3767 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 3768 3769 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 3770 if (size == 1) { 3771 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3772 B->canuseordering = PETSC_TRUE; 3773 } 3774 3775 /* set solvertype */ 3776 PetscCall(PetscFree(B->solvertype)); 3777 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 3778 B->ops->destroy = MatDestroy_MUMPS; 3779 B->data = (void *)mumps; 3780 3781 *F = B; 3782 mumps->id.job = JOB_NULL; 3783 mumps->ICNTL_pre = NULL; 3784 mumps->CNTL_pre = NULL; 3785 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3786 PetscFunctionReturn(PETSC_SUCCESS); 3787 } 3788 3789 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3790 { 3791 PetscFunctionBegin; 3792 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3793 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3794 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3795 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3796 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3797 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3798 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3799 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 3800 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 3801 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 3802 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 3803 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 3804 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 3805 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 3806 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 3807 PetscFunctionReturn(PETSC_SUCCESS); 3808 } 3809