1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 71 *b = (PetscMUMPSInt)(a); 72 PetscFunctionReturn(0); 73 } 74 75 /* Put these utility routines here since they are only used in this file */ 76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 77 { 78 PetscErrorCode ierr; 79 PetscInt myval; 80 PetscBool myset; 81 PetscFunctionBegin; 82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 83 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 84 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 85 if (set) *set = myset; 86 PetscFunctionReturn(0); 87 } 88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 89 90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 91 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 92 #define PetscMUMPS_c(mumps) \ 93 do { \ 94 if (mumps->use_petsc_omp_support) { \ 95 if (mumps->is_omp_master) { \ 96 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 MUMPS_c(&mumps->id); \ 98 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 99 } \ 100 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 105 */ \ 106 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 107 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 108 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 109 } else { \ 110 MUMPS_c(&mumps->id); \ 111 } \ 112 } while (0) 113 #else 114 #define PetscMUMPS_c(mumps) \ 115 do { MUMPS_c(&mumps->id); } while (0) 116 #endif 117 118 /* declare MumpsScalar */ 119 #if defined(PETSC_USE_COMPLEX) 120 #if defined(PETSC_USE_REAL_SINGLE) 121 #define MumpsScalar mumps_complex 122 #else 123 #define MumpsScalar mumps_double_complex 124 #endif 125 #else 126 #define MumpsScalar PetscScalar 127 #endif 128 129 /* macros s.t. indices match MUMPS documentation */ 130 #define ICNTL(I) icntl[(I)-1] 131 #define CNTL(I) cntl[(I)-1] 132 #define INFOG(I) infog[(I)-1] 133 #define INFO(I) info[(I)-1] 134 #define RINFOG(I) rinfog[(I)-1] 135 #define RINFO(I) rinfo[(I)-1] 136 137 typedef struct Mat_MUMPS Mat_MUMPS; 138 struct Mat_MUMPS { 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_REAL_SINGLE) 141 CMUMPS_STRUC_C id; 142 #else 143 ZMUMPS_STRUC_C id; 144 #endif 145 #else 146 #if defined(PETSC_USE_REAL_SINGLE) 147 SMUMPS_STRUC_C id; 148 #else 149 DMUMPS_STRUC_C id; 150 #endif 151 #endif 152 153 MatStructure matstruc; 154 PetscMPIInt myid,petsc_size; 155 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 156 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 158 PetscMUMPSInt sym; 159 MPI_Comm mumps_comm; 160 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 161 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 162 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 163 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 164 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 165 PetscInt *rhs_nrow,max_nrhs; 166 PetscMPIInt *rhs_recvcounts,*rhs_disps; 167 PetscScalar *rhs_loc,*rhs_recvbuf; 168 #endif 169 Vec b_seq,x_seq; 170 PetscInt ninfo,*info; /* which INFO to display */ 171 PetscInt sizeredrhs; 172 PetscScalar *schur_sol; 173 PetscInt schur_sizesol; 174 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 175 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 176 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 177 178 /* stuff used by petsc/mumps OpenMP support*/ 179 PetscBool use_petsc_omp_support; 180 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 181 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 182 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 183 PetscMPIInt tag,omp_comm_size; 184 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 185 MPI_Request *reqs; 186 }; 187 188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 189 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 190 */ 191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 192 { 193 PetscErrorCode ierr; 194 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_64BIT_INDICES) 198 { 199 PetscInt i; 200 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 201 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 202 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 203 mumps->cur_ilen = nrow+1; 204 } 205 if (nnz > mumps->cur_jlen) { 206 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 207 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 208 mumps->cur_jlen = nnz; 209 } 210 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 211 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 212 *ia_mumps = mumps->ia_alloc; 213 *ja_mumps = mumps->ja_alloc; 214 } 215 #else 216 *ia_mumps = ia; 217 *ja_mumps = ja; 218 #endif 219 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 224 { 225 PetscErrorCode ierr; 226 227 PetscFunctionBegin; 228 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 229 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 230 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 231 mumps->id.size_schur = 0; 232 mumps->id.schur_lld = 0; 233 mumps->id.ICNTL(19) = 0; 234 PetscFunctionReturn(0); 235 } 236 237 /* solve with rhs in mumps->id.redrhs and return in the same location */ 238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 239 { 240 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 241 Mat S,B,X; 242 MatFactorSchurStatus schurstatus; 243 PetscInt sizesol; 244 PetscErrorCode ierr; 245 246 PetscFunctionBegin; 247 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 248 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 250 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 252 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 253 #endif 254 switch (schurstatus) { 255 case MAT_FACTOR_SCHUR_FACTORED: 256 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 257 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 259 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 260 #endif 261 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 262 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 263 } else { 264 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 265 } 266 break; 267 case MAT_FACTOR_SCHUR_INVERTED: 268 sizesol = mumps->id.nrhs*mumps->id.size_schur; 269 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 270 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 271 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 272 mumps->schur_sizesol = sizesol; 273 } 274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 275 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 277 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 278 #endif 279 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 280 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 281 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 282 } else { 283 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 284 } 285 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 286 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 287 ierr = MatProductNumeric(X);CHKERRQ(ierr); 288 289 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 290 break; 291 default: 292 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 293 } 294 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 295 ierr = MatDestroy(&B);CHKERRQ(ierr); 296 ierr = MatDestroy(&X);CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 301 { 302 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 307 PetscFunctionReturn(0); 308 } 309 if (!expansion) { /* prepare for the condensation step */ 310 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 311 /* allocate MUMPS internal array to store reduced right-hand sides */ 312 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 313 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 314 mumps->id.lredrhs = mumps->id.size_schur; 315 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 316 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 317 } 318 mumps->id.ICNTL(26) = 1; /* condensation phase */ 319 } else { /* prepare for the expansion step */ 320 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 321 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 322 mumps->id.ICNTL(26) = 2; /* expansion phase */ 323 PetscMUMPS_c(mumps); 324 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 325 /* restore defaults */ 326 mumps->id.ICNTL(26) = -1; 327 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 328 if (mumps->id.nrhs > 1) { 329 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 330 mumps->id.lredrhs = 0; 331 mumps->sizeredrhs = 0; 332 } 333 } 334 PetscFunctionReturn(0); 335 } 336 337 /* 338 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 339 340 input: 341 A - matrix in aij,baij or sbaij format 342 shift - 0: C style output triple; 1: Fortran style output triple. 343 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 344 MAT_REUSE_MATRIX: only the values in v array are updated 345 output: 346 nnz - dim of r, c, and v (number of local nonzero entries of A) 347 r, c, v - row and col index, matrix values (matrix triples) 348 349 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 350 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 351 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 352 353 */ 354 355 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 356 { 357 const PetscScalar *av; 358 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 359 PetscInt64 nz,rnz,i,j,k; 360 PetscErrorCode ierr; 361 PetscMUMPSInt *row,*col; 362 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 363 364 PetscFunctionBegin; 365 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 366 mumps->val = (PetscScalar*)av; 367 if (reuse == MAT_INITIAL_MATRIX) { 368 nz = aa->nz; 369 ai = aa->i; 370 aj = aa->j; 371 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 372 for (i=k=0; i<M; i++) { 373 rnz = ai[i+1] - ai[i]; 374 ajj = aj + ai[i]; 375 for (j=0; j<rnz; j++) { 376 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 377 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 378 k++; 379 } 380 } 381 mumps->irn = row; 382 mumps->jcn = col; 383 mumps->nnz = nz; 384 } 385 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 390 { 391 PetscErrorCode ierr; 392 PetscInt64 nz,i,j,k,r; 393 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 394 PetscMUMPSInt *row,*col; 395 396 PetscFunctionBegin; 397 mumps->val = a->val; 398 if (reuse == MAT_INITIAL_MATRIX) { 399 nz = a->sliidx[a->totalslices]; 400 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 401 for (i=k=0; i<a->totalslices; i++) { 402 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 403 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 404 } 405 } 406 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 407 mumps->irn = row; 408 mumps->jcn = col; 409 mumps->nnz = nz; 410 } 411 PetscFunctionReturn(0); 412 } 413 414 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 415 { 416 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 417 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 418 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 419 PetscInt bs; 420 PetscErrorCode ierr; 421 PetscMUMPSInt *row,*col; 422 423 PetscFunctionBegin; 424 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 425 M = A->rmap->N/bs; 426 mumps->val = aa->a; 427 if (reuse == MAT_INITIAL_MATRIX) { 428 ai = aa->i; aj = aa->j; 429 nz = bs2*aa->nz; 430 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 431 for (i=0; i<M; i++) { 432 ajj = aj + ai[i]; 433 rnz = ai[i+1] - ai[i]; 434 for (k=0; k<rnz; k++) { 435 for (j=0; j<bs; j++) { 436 for (m=0; m<bs; m++) { 437 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 438 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 439 idx++; 440 } 441 } 442 } 443 } 444 mumps->irn = row; 445 mumps->jcn = col; 446 mumps->nnz = nz; 447 } 448 PetscFunctionReturn(0); 449 } 450 451 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 452 { 453 const PetscInt *ai, *aj,*ajj; 454 PetscInt bs; 455 PetscInt64 nz,rnz,i,j,k,m; 456 PetscErrorCode ierr; 457 PetscMUMPSInt *row,*col; 458 PetscScalar *val; 459 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 460 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 461 #if defined(PETSC_USE_COMPLEX) 462 PetscBool hermitian; 463 #endif 464 465 PetscFunctionBegin; 466 #if defined(PETSC_USE_COMPLEX) 467 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 468 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 469 #endif 470 ai = aa->i; 471 aj = aa->j; 472 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 473 if (reuse == MAT_INITIAL_MATRIX) { 474 nz = aa->nz; 475 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 476 if (bs>1) { 477 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 478 mumps->val = mumps->val_alloc; 479 } else { 480 mumps->val = aa->a; 481 } 482 mumps->irn = row; 483 mumps->jcn = col; 484 } else { 485 if (bs == 1) mumps->val = aa->a; 486 row = mumps->irn; 487 col = mumps->jcn; 488 } 489 val = mumps->val; 490 491 nz = 0; 492 if (bs>1) { 493 for (i=0; i<mbs; i++) { 494 rnz = ai[i+1] - ai[i]; 495 ajj = aj + ai[i]; 496 for (j=0; j<rnz; j++) { 497 for (k=0; k<bs; k++) { 498 for (m=0; m<bs; m++) { 499 if (ajj[j]>i || k>=m) { 500 if (reuse == MAT_INITIAL_MATRIX) { 501 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 502 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 503 } 504 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 505 } 506 } 507 } 508 } 509 } 510 } else if (reuse == MAT_INITIAL_MATRIX) { 511 for (i=0; i<mbs; i++) { 512 rnz = ai[i+1] - ai[i]; 513 ajj = aj + ai[i]; 514 for (j=0; j<rnz; j++) { 515 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 516 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 517 nz++; 518 } 519 } 520 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 521 } 522 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 527 { 528 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 529 PetscInt64 nz,rnz,i,j; 530 const PetscScalar *av,*v1; 531 PetscScalar *val; 532 PetscErrorCode ierr; 533 PetscMUMPSInt *row,*col; 534 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 535 PetscBool missing; 536 #if defined(PETSC_USE_COMPLEX) 537 PetscBool hermitian; 538 #endif 539 540 PetscFunctionBegin; 541 #if defined(PETSC_USE_COMPLEX) 542 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 543 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 544 #endif 545 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 546 ai = aa->i; aj = aa->j; 547 adiag = aa->diag; 548 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 549 if (reuse == MAT_INITIAL_MATRIX) { 550 /* count nz in the upper triangular part of A */ 551 nz = 0; 552 if (missing) { 553 for (i=0; i<M; i++) { 554 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 555 for (j=ai[i];j<ai[i+1];j++) { 556 if (aj[j] < i) continue; 557 nz++; 558 } 559 } else { 560 nz += ai[i+1] - adiag[i]; 561 } 562 } 563 } else { 564 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 565 } 566 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 567 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 568 mumps->nnz = nz; 569 mumps->irn = row; 570 mumps->jcn = col; 571 mumps->val = mumps->val_alloc = val; 572 573 nz = 0; 574 if (missing) { 575 for (i=0; i<M; i++) { 576 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 577 for (j=ai[i];j<ai[i+1];j++) { 578 if (aj[j] < i) continue; 579 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 580 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 581 val[nz] = av[j]; 582 nz++; 583 } 584 } else { 585 rnz = ai[i+1] - adiag[i]; 586 ajj = aj + adiag[i]; 587 v1 = av + adiag[i]; 588 for (j=0; j<rnz; j++) { 589 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 590 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 591 val[nz++] = v1[j]; 592 } 593 } 594 } 595 } else { 596 for (i=0; i<M; i++) { 597 rnz = ai[i+1] - adiag[i]; 598 ajj = aj + adiag[i]; 599 v1 = av + adiag[i]; 600 for (j=0; j<rnz; j++) { 601 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 602 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 603 val[nz++] = v1[j]; 604 } 605 } 606 } 607 } else { 608 nz = 0; 609 val = mumps->val; 610 if (missing) { 611 for (i=0; i <M; i++) { 612 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 613 for (j=ai[i];j<ai[i+1];j++) { 614 if (aj[j] < i) continue; 615 val[nz++] = av[j]; 616 } 617 } else { 618 rnz = ai[i+1] - adiag[i]; 619 v1 = av + adiag[i]; 620 for (j=0; j<rnz; j++) { 621 val[nz++] = v1[j]; 622 } 623 } 624 } 625 } else { 626 for (i=0; i <M; i++) { 627 rnz = ai[i+1] - adiag[i]; 628 v1 = av + adiag[i]; 629 for (j=0; j<rnz; j++) { 630 val[nz++] = v1[j]; 631 } 632 } 633 } 634 } 635 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 640 { 641 PetscErrorCode ierr; 642 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 643 PetscInt bs; 644 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 645 PetscMUMPSInt *row,*col; 646 const PetscScalar *av,*bv,*v1,*v2; 647 PetscScalar *val; 648 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 649 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 650 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 651 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 652 #if defined(PETSC_USE_COMPLEX) 653 PetscBool hermitian; 654 #endif 655 656 PetscFunctionBegin; 657 #if defined(PETSC_USE_COMPLEX) 658 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 659 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 660 #endif 661 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 662 rstart = A->rmap->rstart; 663 ai = aa->i; 664 aj = aa->j; 665 bi = bb->i; 666 bj = bb->j; 667 av = aa->a; 668 bv = bb->a; 669 670 garray = mat->garray; 671 672 if (reuse == MAT_INITIAL_MATRIX) { 673 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 674 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 675 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 676 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 677 mumps->irn = row; 678 mumps->jcn = col; 679 mumps->val = mumps->val_alloc = val; 680 } else { 681 val = mumps->val; 682 } 683 684 jj = 0; irow = rstart; 685 for (i=0; i<mbs; i++) { 686 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 687 countA = ai[i+1] - ai[i]; 688 countB = bi[i+1] - bi[i]; 689 bjj = bj + bi[i]; 690 v1 = av + ai[i]*bs2; 691 v2 = bv + bi[i]*bs2; 692 693 if (bs>1) { 694 /* A-part */ 695 for (j=0; j<countA; j++) { 696 for (k=0; k<bs; k++) { 697 for (m=0; m<bs; m++) { 698 if (rstart + ajj[j]*bs>irow || k>=m) { 699 if (reuse == MAT_INITIAL_MATRIX) { 700 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 701 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 702 } 703 val[jj++] = v1[j*bs2 + m + k*bs]; 704 } 705 } 706 } 707 } 708 709 /* B-part */ 710 for (j=0; j < countB; j++) { 711 for (k=0; k<bs; k++) { 712 for (m=0; m<bs; m++) { 713 if (reuse == MAT_INITIAL_MATRIX) { 714 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 715 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 716 } 717 val[jj++] = v2[j*bs2 + m + k*bs]; 718 } 719 } 720 } 721 } else { 722 /* A-part */ 723 for (j=0; j<countA; j++) { 724 if (reuse == MAT_INITIAL_MATRIX) { 725 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 726 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 727 } 728 val[jj++] = v1[j]; 729 } 730 731 /* B-part */ 732 for (j=0; j < countB; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 735 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 736 } 737 val[jj++] = v2[j]; 738 } 739 } 740 irow+=bs; 741 } 742 mumps->nnz = jj; 743 PetscFunctionReturn(0); 744 } 745 746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 747 { 748 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 749 PetscErrorCode ierr; 750 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 751 PetscMUMPSInt *row,*col; 752 const PetscScalar *av, *bv,*v1,*v2; 753 PetscScalar *val; 754 Mat Ad,Ao; 755 Mat_SeqAIJ *aa; 756 Mat_SeqAIJ *bb; 757 758 PetscFunctionBegin; 759 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 760 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 761 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 762 763 aa = (Mat_SeqAIJ*)(Ad)->data; 764 bb = (Mat_SeqAIJ*)(Ao)->data; 765 ai = aa->i; 766 aj = aa->j; 767 bi = bb->i; 768 bj = bb->j; 769 770 rstart = A->rmap->rstart; 771 772 if (reuse == MAT_INITIAL_MATRIX) { 773 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 774 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 775 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 776 mumps->nnz = nz; 777 mumps->irn = row; 778 mumps->jcn = col; 779 mumps->val = mumps->val_alloc = val; 780 } else { 781 val = mumps->val; 782 } 783 784 jj = 0; irow = rstart; 785 for (i=0; i<m; i++) { 786 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 787 countA = ai[i+1] - ai[i]; 788 countB = bi[i+1] - bi[i]; 789 bjj = bj + bi[i]; 790 v1 = av + ai[i]; 791 v2 = bv + bi[i]; 792 793 /* A-part */ 794 for (j=0; j<countA; j++) { 795 if (reuse == MAT_INITIAL_MATRIX) { 796 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 797 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 798 } 799 val[jj++] = v1[j]; 800 } 801 802 /* B-part */ 803 for (j=0; j < countB; j++) { 804 if (reuse == MAT_INITIAL_MATRIX) { 805 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 806 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 807 } 808 val[jj++] = v2[j]; 809 } 810 irow++; 811 } 812 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 813 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 818 { 819 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 820 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 821 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 822 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 823 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 824 const PetscInt bs2=mat->bs2; 825 PetscErrorCode ierr; 826 PetscInt bs; 827 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 828 PetscMUMPSInt *row,*col; 829 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 830 PetscScalar *val; 831 832 PetscFunctionBegin; 833 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 834 if (reuse == MAT_INITIAL_MATRIX) { 835 nz = bs2*(aa->nz + bb->nz); 836 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 837 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 838 mumps->nnz = nz; 839 mumps->irn = row; 840 mumps->jcn = col; 841 mumps->val = mumps->val_alloc = val; 842 } else { 843 val = mumps->val; 844 } 845 846 jj = 0; irow = rstart; 847 for (i=0; i<mbs; i++) { 848 countA = ai[i+1] - ai[i]; 849 countB = bi[i+1] - bi[i]; 850 ajj = aj + ai[i]; 851 bjj = bj + bi[i]; 852 v1 = av + bs2*ai[i]; 853 v2 = bv + bs2*bi[i]; 854 855 idx = 0; 856 /* A-part */ 857 for (k=0; k<countA; k++) { 858 for (j=0; j<bs; j++) { 859 for (n=0; n<bs; n++) { 860 if (reuse == MAT_INITIAL_MATRIX) { 861 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 862 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 863 } 864 val[jj++] = v1[idx++]; 865 } 866 } 867 } 868 869 idx = 0; 870 /* B-part */ 871 for (k=0; k<countB; k++) { 872 for (j=0; j<bs; j++) { 873 for (n=0; n<bs; n++) { 874 if (reuse == MAT_INITIAL_MATRIX) { 875 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 876 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 877 } 878 val[jj++] = v2[idx++]; 879 } 880 } 881 } 882 irow += bs; 883 } 884 PetscFunctionReturn(0); 885 } 886 887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 888 { 889 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 890 PetscErrorCode ierr; 891 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 892 PetscMUMPSInt *row,*col; 893 const PetscScalar *av, *bv,*v1,*v2; 894 PetscScalar *val; 895 Mat Ad,Ao; 896 Mat_SeqAIJ *aa; 897 Mat_SeqAIJ *bb; 898 #if defined(PETSC_USE_COMPLEX) 899 PetscBool hermitian; 900 #endif 901 902 PetscFunctionBegin; 903 #if defined(PETSC_USE_COMPLEX) 904 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 905 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 906 #endif 907 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 908 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 909 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 910 911 aa = (Mat_SeqAIJ*)(Ad)->data; 912 bb = (Mat_SeqAIJ*)(Ao)->data; 913 ai = aa->i; 914 aj = aa->j; 915 adiag = aa->diag; 916 bi = bb->i; 917 bj = bb->j; 918 919 rstart = A->rmap->rstart; 920 921 if (reuse == MAT_INITIAL_MATRIX) { 922 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 923 nzb = 0; /* num of upper triangular entries in mat->B */ 924 for (i=0; i<m; i++) { 925 nza += (ai[i+1] - adiag[i]); 926 countB = bi[i+1] - bi[i]; 927 bjj = bj + bi[i]; 928 for (j=0; j<countB; j++) { 929 if (garray[bjj[j]] > rstart) nzb++; 930 } 931 } 932 933 nz = nza + nzb; /* total nz of upper triangular part of mat */ 934 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 935 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 936 mumps->nnz = nz; 937 mumps->irn = row; 938 mumps->jcn = col; 939 mumps->val = mumps->val_alloc = val; 940 } else { 941 val = mumps->val; 942 } 943 944 jj = 0; irow = rstart; 945 for (i=0; i<m; i++) { 946 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 947 v1 = av + adiag[i]; 948 countA = ai[i+1] - adiag[i]; 949 countB = bi[i+1] - bi[i]; 950 bjj = bj + bi[i]; 951 v2 = bv + bi[i]; 952 953 /* A-part */ 954 for (j=0; j<countA; j++) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 957 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 958 } 959 val[jj++] = v1[j]; 960 } 961 962 /* B-part */ 963 for (j=0; j < countB; j++) { 964 if (garray[bjj[j]] > rstart) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 967 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 968 } 969 val[jj++] = v2[j]; 970 } 971 } 972 irow++; 973 } 974 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 975 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 PetscErrorCode MatDestroy_MUMPS(Mat A) 980 { 981 PetscErrorCode ierr; 982 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 983 984 PetscFunctionBegin; 985 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 986 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 987 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 988 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 989 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 990 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 991 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 993 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 994 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 995 mumps->id.job = JOB_END; 996 PetscMUMPS_c(mumps); 997 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 998 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 999 if (mumps->use_petsc_omp_support) { 1000 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); 1001 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1002 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr); 1003 } 1004 #endif 1005 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 1006 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 1007 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 1008 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 1009 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr); 1010 if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&mumps->mumps_comm);CHKERRMPI(ierr);} 1011 ierr = PetscFree(A->data);CHKERRQ(ierr); 1012 1013 /* clear composed functions */ 1014 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1015 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1016 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1017 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1018 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1031 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1032 { 1033 PetscErrorCode ierr; 1034 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1035 const PetscMPIInt ompsize=mumps->omp_comm_size; 1036 PetscInt i,m,M,rstart; 1037 1038 PetscFunctionBegin; 1039 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 1040 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 1041 if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1042 if (ompsize == 1) { 1043 if (!mumps->irhs_loc) { 1044 mumps->nloc_rhs = m; 1045 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1047 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1048 } 1049 mumps->id.rhs_loc = (MumpsScalar*)array; 1050 } else { 1051 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1052 const PetscInt *ranges; 1053 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1054 MPI_Group petsc_group,omp_group; 1055 PetscScalar *recvbuf=NULL; 1056 1057 if (mumps->is_omp_master) { 1058 /* Lazily initialize the omp stuff for distributed rhs */ 1059 if (!mumps->irhs_loc) { 1060 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr); 1061 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr); 1062 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr); 1063 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr); 1064 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1065 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr); 1066 1067 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1068 mumps->nloc_rhs = 0; 1069 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr); 1070 for (j=0; j<ompsize; j++) { 1071 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1072 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1073 } 1074 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr); 1075 for (j=k=0; j<ompsize; j++) { 1076 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1077 } 1078 1079 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr); 1080 ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr); 1081 ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr); 1082 } 1083 1084 /* Realloc buffers when current nrhs is bigger than what we have met */ 1085 if (nrhs > mumps->max_nrhs) { 1086 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1087 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr); 1088 mumps->max_nrhs = nrhs; 1089 } 1090 1091 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1092 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);} 1093 mumps->rhs_disps[0] = 0; 1094 for (j=1; j<ompsize; j++) { 1095 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1096 if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1097 } 1098 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1099 } 1100 1101 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr); 1102 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr); 1103 1104 if (mumps->is_omp_master) { 1105 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1106 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1107 for (j=0; j<ompsize; j++) { 1108 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1109 dst = dstbase; 1110 for (i=0; i<nrhs; i++) { 1111 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr); 1112 src += mumps->rhs_nrow[j]; 1113 dst += mumps->nloc_rhs; 1114 } 1115 dstbase += mumps->rhs_nrow[j]; 1116 } 1117 } 1118 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1119 } 1120 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1121 } 1122 mumps->id.nrhs = nrhs; 1123 mumps->id.nloc_rhs = mumps->nloc_rhs; 1124 mumps->id.lrhs_loc = mumps->nloc_rhs; 1125 mumps->id.irhs_loc = mumps->irhs_loc; 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1130 { 1131 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1132 const PetscScalar *rarray = NULL; 1133 PetscScalar *array; 1134 IS is_iden,is_petsc; 1135 PetscErrorCode ierr; 1136 PetscInt i; 1137 PetscBool second_solve = PETSC_FALSE; 1138 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1139 1140 PetscFunctionBegin; 1141 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1142 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1143 1144 if (A->factorerrortype) { 1145 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1146 ierr = VecSetInf(x);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 mumps->id.nrhs = 1; 1151 if (mumps->petsc_size > 1) { 1152 if (mumps->ICNTL20 == 10) { 1153 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1154 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr); 1155 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr); 1156 } else { 1157 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1158 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 if (!mumps->myid) { 1161 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr); 1162 mumps->id.rhs = (MumpsScalar*)array; 1163 } 1164 } 1165 } else { /* petsc_size == 1 */ 1166 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1167 ierr = VecCopy(b,x);CHKERRQ(ierr); 1168 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1169 mumps->id.rhs = (MumpsScalar*)array; 1170 } 1171 1172 /* 1173 handle condensation step of Schur complement (if any) 1174 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1175 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1176 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1177 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1178 */ 1179 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1180 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1181 second_solve = PETSC_TRUE; 1182 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1183 } 1184 /* solve phase */ 1185 /*-------------*/ 1186 mumps->id.job = JOB_SOLVE; 1187 PetscMUMPS_c(mumps); 1188 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1189 1190 /* handle expansion step of Schur complement (if any) */ 1191 if (second_solve) { 1192 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1193 } 1194 1195 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1196 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1197 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1198 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1199 } 1200 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1201 PetscInt *isol2_loc=NULL; 1202 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1203 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1204 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1205 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1206 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1207 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1209 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1210 } 1211 1212 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 } 1215 1216 if (mumps->petsc_size > 1) { 1217 if (mumps->ICNTL20 == 10) { 1218 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr); 1219 } else if (!mumps->myid) { 1220 ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr); 1221 } 1222 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1223 1224 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1229 { 1230 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 mumps->id.ICNTL(9) = 0; 1235 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1236 mumps->id.ICNTL(9) = 1; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1241 { 1242 PetscErrorCode ierr; 1243 Mat Bt = NULL; 1244 PetscBool denseX,denseB,flg,flgT; 1245 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1246 PetscInt i,nrhs,M; 1247 PetscScalar *array; 1248 const PetscScalar *rbray; 1249 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1250 PetscMUMPSInt *isol_loc,*isol_loc_save; 1251 PetscScalar *bray,*sol_loc,*sol_loc_save; 1252 IS is_to,is_from; 1253 PetscInt k,proc,j,m,myrstart; 1254 const PetscInt *rstart; 1255 Vec v_mpi,msol_loc; 1256 VecScatter scat_sol; 1257 Vec b_seq; 1258 VecScatter scat_rhs; 1259 PetscScalar *aa; 1260 PetscInt spnr,*ia,*ja; 1261 Mat_MPIAIJ *b = NULL; 1262 1263 PetscFunctionBegin; 1264 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1265 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1266 1267 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1268 if (denseB) { 1269 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1270 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1271 } else { /* sparse B */ 1272 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1273 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1274 if (flgT) { /* input B is transpose of actural RHS matrix, 1275 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1276 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1277 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1278 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1279 } 1280 1281 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1282 mumps->id.nrhs = nrhs; 1283 mumps->id.lrhs = M; 1284 mumps->id.rhs = NULL; 1285 1286 if (mumps->petsc_size == 1) { 1287 PetscScalar *aa; 1288 PetscInt spnr,*ia,*ja; 1289 PetscBool second_solve = PETSC_FALSE; 1290 1291 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1292 mumps->id.rhs = (MumpsScalar*)array; 1293 1294 if (denseB) { 1295 /* copy B to X */ 1296 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1297 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1298 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1299 } else { /* sparse B */ 1300 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1301 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1302 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1303 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1304 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1305 } 1306 /* handle condensation step of Schur complement (if any) */ 1307 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1308 second_solve = PETSC_TRUE; 1309 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1310 } 1311 /* solve phase */ 1312 /*-------------*/ 1313 mumps->id.job = JOB_SOLVE; 1314 PetscMUMPS_c(mumps); 1315 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1316 1317 /* handle expansion step of Schur complement (if any) */ 1318 if (second_solve) { 1319 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1320 } 1321 if (!denseB) { /* sparse B */ 1322 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1323 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1324 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1325 } 1326 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1331 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1332 1333 /* create msol_loc to hold mumps local solution */ 1334 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1335 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1336 1337 lsol_loc = mumps->id.lsol_loc; 1338 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1339 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1340 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1341 mumps->id.isol_loc = isol_loc; 1342 1343 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1344 1345 if (denseB) { 1346 if (mumps->ICNTL20 == 10) { 1347 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1348 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1349 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr); 1350 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1351 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1352 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr); 1353 } else { 1354 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1355 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1356 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1357 0, re-arrange B into desired order, which is a local operation. 1358 */ 1359 1360 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1361 /* wrap dense rhs matrix B into a vector v_mpi */ 1362 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1363 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1364 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1365 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1366 1367 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1368 if (!mumps->myid) { 1369 PetscInt *idx; 1370 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1371 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1372 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1373 k = 0; 1374 for (proc=0; proc<mumps->petsc_size; proc++) { 1375 for (j=0; j<nrhs; j++) { 1376 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1377 } 1378 } 1379 1380 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1381 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1382 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1383 } else { 1384 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1385 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1386 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1387 } 1388 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1389 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1390 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1391 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1392 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1393 1394 if (!mumps->myid) { /* define rhs on the host */ 1395 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1396 mumps->id.rhs = (MumpsScalar*)bray; 1397 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1398 } 1399 } 1400 } else { /* sparse B */ 1401 b = (Mat_MPIAIJ*)Bt->data; 1402 1403 /* wrap dense X into a vector v_mpi */ 1404 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1405 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1406 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1407 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1408 1409 if (!mumps->myid) { 1410 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1411 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1412 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1413 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1414 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1415 } else { 1416 mumps->id.irhs_ptr = NULL; 1417 mumps->id.irhs_sparse = NULL; 1418 mumps->id.nz_rhs = 0; 1419 mumps->id.rhs_sparse = NULL; 1420 } 1421 } 1422 1423 /* solve phase */ 1424 /*-------------*/ 1425 mumps->id.job = JOB_SOLVE; 1426 PetscMUMPS_c(mumps); 1427 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1428 1429 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1430 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1431 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1432 1433 /* create scatter scat_sol */ 1434 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1435 /* iidx: index for scatter mumps solution to petsc X */ 1436 1437 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1438 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1439 for (i=0; i<lsol_loc; i++) { 1440 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1441 1442 for (proc=0; proc<mumps->petsc_size; proc++) { 1443 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1444 myrstart = rstart[proc]; 1445 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1446 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1447 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1448 break; 1449 } 1450 } 1451 1452 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1453 } 1454 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1455 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1456 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1458 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1459 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1460 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1461 1462 /* free spaces */ 1463 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1464 mumps->id.isol_loc = isol_loc_save; 1465 1466 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1467 ierr = PetscFree(idxx);CHKERRQ(ierr); 1468 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1469 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1470 if (!denseB) { 1471 if (!mumps->myid) { 1472 b = (Mat_MPIAIJ*)Bt->data; 1473 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1474 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1475 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1476 } 1477 } else { 1478 if (mumps->ICNTL20 == 0) { 1479 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1480 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1481 } 1482 } 1483 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1484 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1489 { 1490 PetscErrorCode ierr; 1491 PetscBool flg; 1492 Mat B; 1493 1494 PetscFunctionBegin; 1495 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1496 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1497 1498 /* Create B=Bt^T that uses Bt's data structure */ 1499 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1500 1501 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1502 ierr = MatDestroy(&B);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #if !defined(PETSC_USE_COMPLEX) 1507 /* 1508 input: 1509 F: numeric factor 1510 output: 1511 nneg: total number of negative pivots 1512 nzero: total number of zero pivots 1513 npos: (global dimension of F) - nneg - nzero 1514 */ 1515 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1516 { 1517 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1518 PetscErrorCode ierr; 1519 PetscMPIInt size; 1520 1521 PetscFunctionBegin; 1522 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr); 1523 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1524 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1525 1526 if (nneg) *nneg = mumps->id.INFOG(12); 1527 if (nzero || npos) { 1528 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1529 if (nzero) *nzero = mumps->id.INFOG(28); 1530 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1531 } 1532 PetscFunctionReturn(0); 1533 } 1534 #endif 1535 1536 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1537 { 1538 PetscErrorCode ierr; 1539 PetscInt i,nreqs; 1540 PetscMUMPSInt *irn,*jcn; 1541 PetscMPIInt count; 1542 PetscInt64 totnnz,remain; 1543 const PetscInt osize=mumps->omp_comm_size; 1544 PetscScalar *val; 1545 1546 PetscFunctionBegin; 1547 if (osize > 1) { 1548 if (reuse == MAT_INITIAL_MATRIX) { 1549 /* master first gathers counts of nonzeros to receive */ 1550 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1551 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr); 1552 1553 /* Then each computes number of send/recvs */ 1554 if (mumps->is_omp_master) { 1555 /* Start from 1 since self communication is not done in MPI */ 1556 nreqs = 0; 1557 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1558 } else { 1559 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1560 } 1561 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1562 1563 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1564 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1565 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1566 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1567 */ 1568 nreqs = 0; /* counter for actual send/recvs */ 1569 if (mumps->is_omp_master) { 1570 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1571 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1572 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1573 1574 /* Self communication */ 1575 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1576 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1577 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1578 1579 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1580 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1581 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1582 mumps->nnz = totnnz; 1583 mumps->irn = irn; 1584 mumps->jcn = jcn; 1585 mumps->val = mumps->val_alloc = val; 1586 1587 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1588 jcn += mumps->recvcount[0]; 1589 val += mumps->recvcount[0]; 1590 1591 /* Remote communication */ 1592 for (i=1; i<osize; i++) { 1593 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1594 remain = mumps->recvcount[i] - count; 1595 while (count>0) { 1596 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1597 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1598 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1599 irn += count; 1600 jcn += count; 1601 val += count; 1602 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1603 remain -= count; 1604 } 1605 } 1606 } else { 1607 irn = mumps->irn; 1608 jcn = mumps->jcn; 1609 val = mumps->val; 1610 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1611 remain = mumps->nnz - count; 1612 while (count>0) { 1613 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1614 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1615 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1616 irn += count; 1617 jcn += count; 1618 val += count; 1619 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1620 remain -= count; 1621 } 1622 } 1623 } else { 1624 nreqs = 0; 1625 if (mumps->is_omp_master) { 1626 val = mumps->val + mumps->recvcount[0]; 1627 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1628 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1629 remain = mumps->recvcount[i] - count; 1630 while (count>0) { 1631 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1632 val += count; 1633 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1634 remain -= count; 1635 } 1636 } 1637 } else { 1638 val = mumps->val; 1639 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1640 remain = mumps->nnz - count; 1641 while (count>0) { 1642 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1643 val += count; 1644 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1645 remain -= count; 1646 } 1647 } 1648 } 1649 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 1650 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1651 } 1652 PetscFunctionReturn(0); 1653 } 1654 1655 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1656 { 1657 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1658 PetscErrorCode ierr; 1659 PetscBool isMPIAIJ; 1660 1661 PetscFunctionBegin; 1662 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1663 if (mumps->id.INFOG(1) == -6) { 1664 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1665 } 1666 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1671 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1672 1673 /* numerical factorization phase */ 1674 /*-------------------------------*/ 1675 mumps->id.job = JOB_FACTNUMERIC; 1676 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1677 if (!mumps->myid) { 1678 mumps->id.a = (MumpsScalar*)mumps->val; 1679 } 1680 } else { 1681 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1682 } 1683 PetscMUMPS_c(mumps); 1684 if (mumps->id.INFOG(1) < 0) { 1685 if (A->erroriffailure) { 1686 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1687 } else { 1688 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1689 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1690 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1691 } else if (mumps->id.INFOG(1) == -13) { 1692 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1693 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1694 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1695 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1696 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1697 } else { 1698 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1699 F->factorerrortype = MAT_FACTOR_OTHER; 1700 } 1701 } 1702 } 1703 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1704 1705 F->assembled = PETSC_TRUE; 1706 mumps->matstruc = SAME_NONZERO_PATTERN; 1707 if (F->schur) { /* reset Schur status to unfactored */ 1708 #if defined(PETSC_HAVE_CUDA) 1709 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1710 #endif 1711 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1712 mumps->id.ICNTL(19) = 2; 1713 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1714 } 1715 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1716 } 1717 1718 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1719 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1720 1721 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1722 if (mumps->petsc_size > 1) { 1723 PetscInt lsol_loc; 1724 PetscScalar *sol_loc; 1725 1726 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1727 1728 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1729 if (mumps->x_seq) { 1730 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1731 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1732 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1733 } 1734 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1735 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1736 mumps->id.lsol_loc = lsol_loc; 1737 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1738 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1739 } 1740 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 /* Sets MUMPS options from the options database */ 1745 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1746 { 1747 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1748 PetscErrorCode ierr; 1749 PetscMUMPSInt icntl=0; 1750 PetscInt info[80],i,ninfo=80; 1751 PetscBool flg=PETSC_FALSE; 1752 1753 PetscFunctionBegin; 1754 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1755 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1756 if (flg) mumps->id.ICNTL(1) = icntl; 1757 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1758 if (flg) mumps->id.ICNTL(2) = icntl; 1759 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1760 if (flg) mumps->id.ICNTL(3) = icntl; 1761 1762 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1763 if (flg) mumps->id.ICNTL(4) = icntl; 1764 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1765 1766 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1767 if (flg) mumps->id.ICNTL(6) = icntl; 1768 1769 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1770 if (flg) { 1771 if (icntl == 1 || icntl < 0 || icntl > 7) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto\n"); 1772 mumps->id.ICNTL(7) = icntl; 1773 } 1774 1775 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1776 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1777 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1778 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1780 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1781 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1782 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1783 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1784 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1785 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1786 } 1787 1788 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1789 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1790 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1791 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1792 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1793 In short, we could not use distributed RHS with MPICH until v4.0b1. 1794 */ 1795 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1796 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1797 #else 1798 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1799 #endif 1800 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr); 1801 if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10\n",(int)mumps->ICNTL20); 1802 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1803 if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0\n"); 1804 #endif 1805 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1806 1807 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1808 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1809 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1810 if (mumps->id.ICNTL(24)) { 1811 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1812 } 1813 1814 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1815 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1816 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1817 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1818 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1819 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1820 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1821 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1822 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1823 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1824 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1825 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1826 1827 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1828 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1829 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1830 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1831 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1832 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1833 1834 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr); 1835 1836 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1837 if (ninfo) { 1838 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1839 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1840 mumps->ninfo = ninfo; 1841 for (i=0; i<ninfo; i++) { 1842 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1843 else mumps->info[i] = info[i]; 1844 } 1845 } 1846 1847 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1848 PetscFunctionReturn(0); 1849 } 1850 1851 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1852 { 1853 PetscErrorCode ierr; 1854 PetscInt nthreads=0; 1855 MPI_Comm newcomm=MPI_COMM_NULL; 1856 1857 PetscFunctionBegin; 1858 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1859 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr); 1860 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */ 1861 1862 ierr = PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1863 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1864 ierr = PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1865 if (mumps->use_petsc_omp_support) { 1866 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1867 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1868 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1869 #else 1870 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n",((PetscObject)A)->prefix?((PetscObject)A)->prefix:""); 1871 #endif 1872 } else { 1873 mumps->omp_comm = PETSC_COMM_SELF; 1874 mumps->mumps_comm = mumps->petsc_comm; 1875 mumps->is_omp_master = PETSC_TRUE; 1876 } 1877 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr); 1878 mumps->reqs = NULL; 1879 mumps->tag = 0; 1880 1881 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1882 if (mumps->mumps_comm != MPI_COMM_NULL) { 1883 ierr = MPI_Comm_dup(mumps->mumps_comm,&newcomm);CHKERRMPI(ierr); 1884 mumps->mumps_comm = newcomm; 1885 } 1886 1887 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1888 mumps->id.job = JOB_INIT; 1889 mumps->id.par = 1; /* host participates factorizaton and solve */ 1890 mumps->id.sym = mumps->sym; 1891 1892 PetscMUMPS_c(mumps); 1893 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1894 1895 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1896 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1897 */ 1898 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRMPI(ierr); 1899 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr); 1900 1901 mumps->scat_rhs = NULL; 1902 mumps->scat_sol = NULL; 1903 1904 /* set PETSc-MUMPS default options - override MUMPS default */ 1905 mumps->id.ICNTL(3) = 0; 1906 mumps->id.ICNTL(4) = 0; 1907 if (mumps->petsc_size == 1) { 1908 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1909 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1910 } else { 1911 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1912 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1913 } 1914 1915 /* schur */ 1916 mumps->id.size_schur = 0; 1917 mumps->id.listvar_schur = NULL; 1918 mumps->id.schur = NULL; 1919 mumps->sizeredrhs = 0; 1920 mumps->schur_sol = NULL; 1921 mumps->schur_sizesol = 0; 1922 PetscFunctionReturn(0); 1923 } 1924 1925 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1926 { 1927 PetscErrorCode ierr; 1928 1929 PetscFunctionBegin; 1930 if (mumps->id.INFOG(1) < 0) { 1931 if (A->erroriffailure) { 1932 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1933 } else { 1934 if (mumps->id.INFOG(1) == -6) { 1935 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1936 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1937 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1938 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1939 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1940 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1941 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1942 } else { 1943 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1944 F->factorerrortype = MAT_FACTOR_OTHER; 1945 } 1946 } 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1952 { 1953 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1954 PetscErrorCode ierr; 1955 Vec b; 1956 const PetscInt M = A->rmap->N; 1957 1958 PetscFunctionBegin; 1959 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1960 1961 /* Set MUMPS options from the options database */ 1962 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1963 1964 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1965 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1966 1967 /* analysis phase */ 1968 /*----------------*/ 1969 mumps->id.job = JOB_FACTSYMBOLIC; 1970 mumps->id.n = M; 1971 switch (mumps->id.ICNTL(18)) { 1972 case 0: /* centralized assembled matrix input */ 1973 if (!mumps->myid) { 1974 mumps->id.nnz = mumps->nnz; 1975 mumps->id.irn = mumps->irn; 1976 mumps->id.jcn = mumps->jcn; 1977 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1978 if (r) { 1979 mumps->id.ICNTL(7) = 1; 1980 if (!mumps->myid) { 1981 const PetscInt *idx; 1982 PetscInt i; 1983 1984 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1985 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1986 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1987 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1988 } 1989 } 1990 } 1991 break; 1992 case 3: /* distributed assembled matrix input (size>1) */ 1993 mumps->id.nnz_loc = mumps->nnz; 1994 mumps->id.irn_loc = mumps->irn; 1995 mumps->id.jcn_loc = mumps->jcn; 1996 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1997 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1998 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1999 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2000 ierr = VecDestroy(&b);CHKERRQ(ierr); 2001 } 2002 break; 2003 } 2004 PetscMUMPS_c(mumps); 2005 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2006 2007 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2008 F->ops->solve = MatSolve_MUMPS; 2009 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2010 F->ops->matsolve = MatMatSolve_MUMPS; 2011 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 /* Note the Petsc r and c permutations are ignored */ 2016 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2017 { 2018 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2019 PetscErrorCode ierr; 2020 Vec b; 2021 const PetscInt M = A->rmap->N; 2022 2023 PetscFunctionBegin; 2024 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2025 2026 /* Set MUMPS options from the options database */ 2027 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2028 2029 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2030 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2031 2032 /* analysis phase */ 2033 /*----------------*/ 2034 mumps->id.job = JOB_FACTSYMBOLIC; 2035 mumps->id.n = M; 2036 switch (mumps->id.ICNTL(18)) { 2037 case 0: /* centralized assembled matrix input */ 2038 if (!mumps->myid) { 2039 mumps->id.nnz = mumps->nnz; 2040 mumps->id.irn = mumps->irn; 2041 mumps->id.jcn = mumps->jcn; 2042 if (mumps->id.ICNTL(6)>1) { 2043 mumps->id.a = (MumpsScalar*)mumps->val; 2044 } 2045 } 2046 break; 2047 case 3: /* distributed assembled matrix input (size>1) */ 2048 mumps->id.nnz_loc = mumps->nnz; 2049 mumps->id.irn_loc = mumps->irn; 2050 mumps->id.jcn_loc = mumps->jcn; 2051 if (mumps->id.ICNTL(6)>1) { 2052 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2053 } 2054 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2055 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2056 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2057 ierr = VecDestroy(&b);CHKERRQ(ierr); 2058 } 2059 break; 2060 } 2061 PetscMUMPS_c(mumps); 2062 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2063 2064 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2065 F->ops->solve = MatSolve_MUMPS; 2066 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2067 PetscFunctionReturn(0); 2068 } 2069 2070 /* Note the Petsc r permutation and factor info are ignored */ 2071 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2072 { 2073 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2074 PetscErrorCode ierr; 2075 Vec b; 2076 const PetscInt M = A->rmap->N; 2077 2078 PetscFunctionBegin; 2079 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2080 2081 /* Set MUMPS options from the options database */ 2082 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2083 2084 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2085 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2086 2087 /* analysis phase */ 2088 /*----------------*/ 2089 mumps->id.job = JOB_FACTSYMBOLIC; 2090 mumps->id.n = M; 2091 switch (mumps->id.ICNTL(18)) { 2092 case 0: /* centralized assembled matrix input */ 2093 if (!mumps->myid) { 2094 mumps->id.nnz = mumps->nnz; 2095 mumps->id.irn = mumps->irn; 2096 mumps->id.jcn = mumps->jcn; 2097 if (mumps->id.ICNTL(6)>1) { 2098 mumps->id.a = (MumpsScalar*)mumps->val; 2099 } 2100 } 2101 break; 2102 case 3: /* distributed assembled matrix input (size>1) */ 2103 mumps->id.nnz_loc = mumps->nnz; 2104 mumps->id.irn_loc = mumps->irn; 2105 mumps->id.jcn_loc = mumps->jcn; 2106 if (mumps->id.ICNTL(6)>1) { 2107 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2108 } 2109 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2110 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2111 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2112 ierr = VecDestroy(&b);CHKERRQ(ierr); 2113 } 2114 break; 2115 } 2116 PetscMUMPS_c(mumps); 2117 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2118 2119 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2120 F->ops->solve = MatSolve_MUMPS; 2121 F->ops->solvetranspose = MatSolve_MUMPS; 2122 F->ops->matsolve = MatMatSolve_MUMPS; 2123 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2124 #if defined(PETSC_USE_COMPLEX) 2125 F->ops->getinertia = NULL; 2126 #else 2127 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2128 #endif 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2133 { 2134 PetscErrorCode ierr; 2135 PetscBool iascii; 2136 PetscViewerFormat format; 2137 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2138 2139 PetscFunctionBegin; 2140 /* check if matrix is mumps type */ 2141 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2142 2143 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 2144 if (iascii) { 2145 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2146 if (format == PETSC_VIEWER_ASCII_INFO) { 2147 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 2148 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym);CHKERRQ(ierr); 2149 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par);CHKERRQ(ierr); 2150 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr); 2151 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr); 2152 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr); 2153 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr); 2154 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr); 2155 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr); 2156 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr); 2157 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr); 2158 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2159 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2160 if (mumps->id.ICNTL(11)>0) { 2161 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2162 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2164 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2165 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2166 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2167 } 2168 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2169 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2171 /* ICNTL(15-17) not used */ 2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2173 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2174 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2175 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2176 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2177 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2178 2179 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2180 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2181 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2182 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2183 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2184 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2185 2186 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2187 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2188 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2189 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2190 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2191 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2192 2193 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2194 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2195 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2196 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2197 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2198 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2199 2200 /* information local to each processor */ 2201 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2202 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2203 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2204 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2205 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2206 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2207 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2208 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2209 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2210 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2211 2212 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2213 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2214 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2215 2216 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2217 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2218 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2219 2220 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2221 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2222 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2223 2224 if (mumps->ninfo && mumps->ninfo <= 80) { 2225 PetscInt i; 2226 for (i=0; i<mumps->ninfo; i++) { 2227 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 2228 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2229 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2230 } 2231 } 2232 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2233 2234 if (!mumps->myid) { /* information from the host */ 2235 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2236 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2237 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2238 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2239 2240 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr); 2241 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr); 2242 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr); 2243 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr); 2244 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr); 2245 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr); 2246 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr); 2247 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr); 2248 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr); 2249 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr); 2250 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr); 2251 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr); 2252 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr); 2254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr); 2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr); 2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr); 2257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr); 2258 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr); 2259 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr); 2260 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr); 2261 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr); 2262 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr); 2263 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2264 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2265 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2267 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2268 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2269 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2270 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr); 2271 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr); 2272 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr); 2273 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr); 2274 } 2275 } 2276 } 2277 PetscFunctionReturn(0); 2278 } 2279 2280 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2281 { 2282 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2283 2284 PetscFunctionBegin; 2285 info->block_size = 1.0; 2286 info->nz_allocated = mumps->id.INFOG(20); 2287 info->nz_used = mumps->id.INFOG(20); 2288 info->nz_unneeded = 0.0; 2289 info->assemblies = 0.0; 2290 info->mallocs = 0.0; 2291 info->memory = 0.0; 2292 info->fill_ratio_given = 0; 2293 info->fill_ratio_needed = 0; 2294 info->factor_mallocs = 0; 2295 PetscFunctionReturn(0); 2296 } 2297 2298 /* -------------------------------------------------------------------------------------------*/ 2299 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2300 { 2301 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2302 const PetscScalar *arr; 2303 const PetscInt *idxs; 2304 PetscInt size,i; 2305 PetscErrorCode ierr; 2306 2307 PetscFunctionBegin; 2308 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2309 if (mumps->petsc_size > 1) { 2310 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2311 2312 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2313 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr); 2314 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 2315 } 2316 2317 /* Schur complement matrix */ 2318 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2319 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2320 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2321 mumps->id.schur = (MumpsScalar*)arr; 2322 mumps->id.size_schur = size; 2323 mumps->id.schur_lld = size; 2324 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2325 if (mumps->sym == 1) { 2326 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2327 } 2328 2329 /* MUMPS expects Fortran style indices */ 2330 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2331 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2332 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2333 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2334 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2335 if (mumps->petsc_size > 1) { 2336 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2337 } else { 2338 if (F->factortype == MAT_FACTOR_LU) { 2339 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2340 } else { 2341 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2342 } 2343 } 2344 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2345 mumps->id.ICNTL(26) = -1; 2346 PetscFunctionReturn(0); 2347 } 2348 2349 /* -------------------------------------------------------------------------------------------*/ 2350 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2351 { 2352 Mat St; 2353 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2354 PetscScalar *array; 2355 #if defined(PETSC_USE_COMPLEX) 2356 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2357 #endif 2358 PetscErrorCode ierr; 2359 2360 PetscFunctionBegin; 2361 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2362 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2363 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2364 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2365 ierr = MatSetUp(St);CHKERRQ(ierr); 2366 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2367 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2368 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2369 PetscInt i,j,N=mumps->id.size_schur; 2370 for (i=0;i<N;i++) { 2371 for (j=0;j<N;j++) { 2372 #if !defined(PETSC_USE_COMPLEX) 2373 PetscScalar val = mumps->id.schur[i*N+j]; 2374 #else 2375 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2376 #endif 2377 array[j*N+i] = val; 2378 } 2379 } 2380 } else { /* stored by columns */ 2381 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2382 } 2383 } else { /* either full or lower-triangular (not packed) */ 2384 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2385 PetscInt i,j,N=mumps->id.size_schur; 2386 for (i=0;i<N;i++) { 2387 for (j=i;j<N;j++) { 2388 #if !defined(PETSC_USE_COMPLEX) 2389 PetscScalar val = mumps->id.schur[i*N+j]; 2390 #else 2391 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2392 #endif 2393 array[i*N+j] = val; 2394 array[j*N+i] = val; 2395 } 2396 } 2397 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2398 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2399 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2400 PetscInt i,j,N=mumps->id.size_schur; 2401 for (i=0;i<N;i++) { 2402 for (j=0;j<i+1;j++) { 2403 #if !defined(PETSC_USE_COMPLEX) 2404 PetscScalar val = mumps->id.schur[i*N+j]; 2405 #else 2406 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2407 #endif 2408 array[i*N+j] = val; 2409 array[j*N+i] = val; 2410 } 2411 } 2412 } 2413 } 2414 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2415 *S = St; 2416 PetscFunctionReturn(0); 2417 } 2418 2419 /* -------------------------------------------------------------------------------------------*/ 2420 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2421 { 2422 PetscErrorCode ierr; 2423 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2424 2425 PetscFunctionBegin; 2426 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2431 { 2432 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2433 2434 PetscFunctionBegin; 2435 *ival = mumps->id.ICNTL(icntl); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 /*@ 2440 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2441 2442 Logically Collective on Mat 2443 2444 Input Parameters: 2445 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2446 . icntl - index of MUMPS parameter array ICNTL() 2447 - ival - value of MUMPS ICNTL(icntl) 2448 2449 Options Database: 2450 . -mat_mumps_icntl_<icntl> <ival> 2451 2452 Level: beginner 2453 2454 References: 2455 . MUMPS Users' Guide 2456 2457 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2458 @*/ 2459 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2460 { 2461 PetscErrorCode ierr; 2462 2463 PetscFunctionBegin; 2464 PetscValidType(F,1); 2465 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2466 PetscValidLogicalCollectiveInt(F,icntl,2); 2467 PetscValidLogicalCollectiveInt(F,ival,3); 2468 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 /*@ 2473 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2474 2475 Logically Collective on Mat 2476 2477 Input Parameters: 2478 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2479 - icntl - index of MUMPS parameter array ICNTL() 2480 2481 Output Parameter: 2482 . ival - value of MUMPS ICNTL(icntl) 2483 2484 Level: beginner 2485 2486 References: 2487 . MUMPS Users' Guide 2488 2489 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2490 @*/ 2491 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2492 { 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 PetscValidType(F,1); 2497 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2498 PetscValidLogicalCollectiveInt(F,icntl,2); 2499 PetscValidIntPointer(ival,3); 2500 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 /* -------------------------------------------------------------------------------------------*/ 2505 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2506 { 2507 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2508 2509 PetscFunctionBegin; 2510 mumps->id.CNTL(icntl) = val; 2511 PetscFunctionReturn(0); 2512 } 2513 2514 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2515 { 2516 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2517 2518 PetscFunctionBegin; 2519 *val = mumps->id.CNTL(icntl); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 /*@ 2524 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2525 2526 Logically Collective on Mat 2527 2528 Input Parameters: 2529 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2530 . icntl - index of MUMPS parameter array CNTL() 2531 - val - value of MUMPS CNTL(icntl) 2532 2533 Options Database: 2534 . -mat_mumps_cntl_<icntl> <val> 2535 2536 Level: beginner 2537 2538 References: 2539 . MUMPS Users' Guide 2540 2541 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2542 @*/ 2543 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2544 { 2545 PetscErrorCode ierr; 2546 2547 PetscFunctionBegin; 2548 PetscValidType(F,1); 2549 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2550 PetscValidLogicalCollectiveInt(F,icntl,2); 2551 PetscValidLogicalCollectiveReal(F,val,3); 2552 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2553 PetscFunctionReturn(0); 2554 } 2555 2556 /*@ 2557 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2558 2559 Logically Collective on Mat 2560 2561 Input Parameters: 2562 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2563 - icntl - index of MUMPS parameter array CNTL() 2564 2565 Output Parameter: 2566 . val - value of MUMPS CNTL(icntl) 2567 2568 Level: beginner 2569 2570 References: 2571 . MUMPS Users' Guide 2572 2573 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2574 @*/ 2575 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2576 { 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 PetscValidType(F,1); 2581 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2582 PetscValidLogicalCollectiveInt(F,icntl,2); 2583 PetscValidRealPointer(val,3); 2584 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2589 { 2590 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2591 2592 PetscFunctionBegin; 2593 *info = mumps->id.INFO(icntl); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2598 { 2599 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2600 2601 PetscFunctionBegin; 2602 *infog = mumps->id.INFOG(icntl); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2607 { 2608 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2609 2610 PetscFunctionBegin; 2611 *rinfo = mumps->id.RINFO(icntl); 2612 PetscFunctionReturn(0); 2613 } 2614 2615 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2616 { 2617 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2618 2619 PetscFunctionBegin; 2620 *rinfog = mumps->id.RINFOG(icntl); 2621 PetscFunctionReturn(0); 2622 } 2623 2624 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2625 { 2626 PetscErrorCode ierr; 2627 Mat Bt = NULL,Btseq = NULL; 2628 PetscBool flg; 2629 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2630 PetscScalar *aa; 2631 PetscInt spnr,*ia,*ja,M,nrhs; 2632 2633 PetscFunctionBegin; 2634 PetscValidPointer(spRHS,2); 2635 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2636 if (flg) { 2637 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2638 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2639 2640 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2641 2642 if (mumps->petsc_size > 1) { 2643 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2644 Btseq = b->A; 2645 } else { 2646 Btseq = Bt; 2647 } 2648 2649 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2650 mumps->id.nrhs = nrhs; 2651 mumps->id.lrhs = M; 2652 mumps->id.rhs = NULL; 2653 2654 if (!mumps->myid) { 2655 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2656 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2657 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2658 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2659 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2660 } else { 2661 mumps->id.irhs_ptr = NULL; 2662 mumps->id.irhs_sparse = NULL; 2663 mumps->id.nz_rhs = 0; 2664 mumps->id.rhs_sparse = NULL; 2665 } 2666 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2667 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2668 2669 /* solve phase */ 2670 /*-------------*/ 2671 mumps->id.job = JOB_SOLVE; 2672 PetscMUMPS_c(mumps); 2673 if (mumps->id.INFOG(1) < 0) 2674 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2675 2676 if (!mumps->myid) { 2677 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2678 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2679 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2680 } 2681 PetscFunctionReturn(0); 2682 } 2683 2684 /*@ 2685 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2686 2687 Logically Collective on Mat 2688 2689 Input Parameters: 2690 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2691 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2692 2693 Output Parameter: 2694 . spRHS - requested entries of inverse of A 2695 2696 Level: beginner 2697 2698 References: 2699 . MUMPS Users' Guide 2700 2701 .seealso: MatGetFactor(), MatCreateTranspose() 2702 @*/ 2703 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2704 { 2705 PetscErrorCode ierr; 2706 2707 PetscFunctionBegin; 2708 PetscValidType(F,1); 2709 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2710 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2711 PetscFunctionReturn(0); 2712 } 2713 2714 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2715 { 2716 PetscErrorCode ierr; 2717 Mat spRHS; 2718 2719 PetscFunctionBegin; 2720 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2721 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2722 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2723 PetscFunctionReturn(0); 2724 } 2725 2726 /*@ 2727 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2728 2729 Logically Collective on Mat 2730 2731 Input Parameters: 2732 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2733 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2734 2735 Output Parameter: 2736 . spRHST - requested entries of inverse of A^T 2737 2738 Level: beginner 2739 2740 References: 2741 . MUMPS Users' Guide 2742 2743 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2744 @*/ 2745 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2746 { 2747 PetscErrorCode ierr; 2748 PetscBool flg; 2749 2750 PetscFunctionBegin; 2751 PetscValidType(F,1); 2752 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2753 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2754 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2755 2756 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2757 PetscFunctionReturn(0); 2758 } 2759 2760 /*@ 2761 MatMumpsGetInfo - Get MUMPS parameter INFO() 2762 2763 Logically Collective on Mat 2764 2765 Input Parameters: 2766 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2767 - icntl - index of MUMPS parameter array INFO() 2768 2769 Output Parameter: 2770 . ival - value of MUMPS INFO(icntl) 2771 2772 Level: beginner 2773 2774 References: 2775 . MUMPS Users' Guide 2776 2777 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2778 @*/ 2779 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2780 { 2781 PetscErrorCode ierr; 2782 2783 PetscFunctionBegin; 2784 PetscValidType(F,1); 2785 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2786 PetscValidIntPointer(ival,3); 2787 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2788 PetscFunctionReturn(0); 2789 } 2790 2791 /*@ 2792 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2793 2794 Logically Collective on Mat 2795 2796 Input Parameters: 2797 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2798 - icntl - index of MUMPS parameter array INFOG() 2799 2800 Output Parameter: 2801 . ival - value of MUMPS INFOG(icntl) 2802 2803 Level: beginner 2804 2805 References: 2806 . MUMPS Users' Guide 2807 2808 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2809 @*/ 2810 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2811 { 2812 PetscErrorCode ierr; 2813 2814 PetscFunctionBegin; 2815 PetscValidType(F,1); 2816 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2817 PetscValidIntPointer(ival,3); 2818 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2819 PetscFunctionReturn(0); 2820 } 2821 2822 /*@ 2823 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2824 2825 Logically Collective on Mat 2826 2827 Input Parameters: 2828 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2829 - icntl - index of MUMPS parameter array RINFO() 2830 2831 Output Parameter: 2832 . val - value of MUMPS RINFO(icntl) 2833 2834 Level: beginner 2835 2836 References: 2837 . MUMPS Users' Guide 2838 2839 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2840 @*/ 2841 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2842 { 2843 PetscErrorCode ierr; 2844 2845 PetscFunctionBegin; 2846 PetscValidType(F,1); 2847 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2848 PetscValidRealPointer(val,3); 2849 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2850 PetscFunctionReturn(0); 2851 } 2852 2853 /*@ 2854 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2855 2856 Logically Collective on Mat 2857 2858 Input Parameters: 2859 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2860 - icntl - index of MUMPS parameter array RINFOG() 2861 2862 Output Parameter: 2863 . val - value of MUMPS RINFOG(icntl) 2864 2865 Level: beginner 2866 2867 References: 2868 . MUMPS Users' Guide 2869 2870 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2871 @*/ 2872 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2873 { 2874 PetscErrorCode ierr; 2875 2876 PetscFunctionBegin; 2877 PetscValidType(F,1); 2878 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2879 PetscValidRealPointer(val,3); 2880 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2881 PetscFunctionReturn(0); 2882 } 2883 2884 /*MC 2885 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2886 distributed and sequential matrices via the external package MUMPS. 2887 2888 Works with MATAIJ and MATSBAIJ matrices 2889 2890 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2891 2892 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2893 2894 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2895 2896 Options Database Keys: 2897 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2898 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2899 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2900 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2901 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2902 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2903 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2904 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2905 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2906 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2907 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2908 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2909 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2910 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2911 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2912 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2913 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2914 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2915 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2916 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2917 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2918 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2919 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2920 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2921 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2922 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2923 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2924 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2925 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2926 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2927 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2928 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2929 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2930 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2931 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2932 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2933 2934 Level: beginner 2935 2936 Notes: 2937 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2938 2939 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2940 $ KSPGetPC(ksp,&pc); 2941 $ PCFactorGetMatrix(pc,&mat); 2942 $ MatMumpsGetInfo(mat,....); 2943 $ MatMumpsGetInfog(mat,....); etc. 2944 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2945 2946 Using MUMPS with 64-bit integers 2947 MUMPS provides 64-bit integer support in two build modes: 2948 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2949 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2950 2951 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2952 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2953 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2954 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2955 2956 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2957 2958 Two modes to run MUMPS/PETSc with OpenMP 2959 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2960 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2961 2962 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2963 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2964 2965 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2966 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2967 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2968 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2969 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2970 2971 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2972 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2973 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2974 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2975 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2976 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2977 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2978 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2979 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2980 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2981 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2982 examine the mapping result. 2983 2984 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2985 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2986 calls omp_set_num_threads(m) internally before calling MUMPS. 2987 2988 References: 2989 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2990 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2991 2992 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix() 2993 2994 M*/ 2995 2996 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2997 { 2998 PetscFunctionBegin; 2999 *type = MATSOLVERMUMPS; 3000 PetscFunctionReturn(0); 3001 } 3002 3003 /* MatGetFactor for Seq and MPI AIJ matrices */ 3004 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 3005 { 3006 Mat B; 3007 PetscErrorCode ierr; 3008 Mat_MUMPS *mumps; 3009 PetscBool isSeqAIJ; 3010 PetscMPIInt size; 3011 3012 PetscFunctionBegin; 3013 #if defined(PETSC_USE_COMPLEX) 3014 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3015 #endif 3016 /* Create the factorization matrix */ 3017 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 3018 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3019 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3020 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3021 ierr = MatSetUp(B);CHKERRQ(ierr); 3022 3023 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3024 3025 B->ops->view = MatView_MUMPS; 3026 B->ops->getinfo = MatGetInfo_MUMPS; 3027 3028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3041 3042 if (ftype == MAT_FACTOR_LU) { 3043 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3044 B->factortype = MAT_FACTOR_LU; 3045 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3046 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3047 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3048 mumps->sym = 0; 3049 } else { 3050 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3051 B->factortype = MAT_FACTOR_CHOLESKY; 3052 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3053 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3054 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3055 #if defined(PETSC_USE_COMPLEX) 3056 mumps->sym = 2; 3057 #else 3058 if (A->spd_set && A->spd) mumps->sym = 1; 3059 else mumps->sym = 2; 3060 #endif 3061 } 3062 3063 /* set solvertype */ 3064 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3065 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3066 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3067 if (size == 1) { 3068 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3069 B->canuseordering = PETSC_TRUE; 3070 } 3071 B->ops->destroy = MatDestroy_MUMPS; 3072 B->data = (void*)mumps; 3073 3074 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3075 3076 *F = B; 3077 PetscFunctionReturn(0); 3078 } 3079 3080 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3081 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3082 { 3083 Mat B; 3084 PetscErrorCode ierr; 3085 Mat_MUMPS *mumps; 3086 PetscBool isSeqSBAIJ; 3087 PetscMPIInt size; 3088 3089 PetscFunctionBegin; 3090 #if defined(PETSC_USE_COMPLEX) 3091 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3092 #endif 3093 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3094 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3095 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3096 ierr = MatSetUp(B);CHKERRQ(ierr); 3097 3098 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3099 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 3100 if (isSeqSBAIJ) { 3101 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3102 } else { 3103 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3104 } 3105 3106 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3107 B->ops->view = MatView_MUMPS; 3108 B->ops->getinfo = MatGetInfo_MUMPS; 3109 3110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3123 3124 B->factortype = MAT_FACTOR_CHOLESKY; 3125 #if defined(PETSC_USE_COMPLEX) 3126 mumps->sym = 2; 3127 #else 3128 if (A->spd_set && A->spd) mumps->sym = 1; 3129 else mumps->sym = 2; 3130 #endif 3131 3132 /* set solvertype */ 3133 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3134 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3135 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3136 if (size == 1) { 3137 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3138 B->canuseordering = PETSC_TRUE; 3139 } 3140 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3141 B->ops->destroy = MatDestroy_MUMPS; 3142 B->data = (void*)mumps; 3143 3144 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3145 3146 *F = B; 3147 PetscFunctionReturn(0); 3148 } 3149 3150 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3151 { 3152 Mat B; 3153 PetscErrorCode ierr; 3154 Mat_MUMPS *mumps; 3155 PetscBool isSeqBAIJ; 3156 PetscMPIInt size; 3157 3158 PetscFunctionBegin; 3159 /* Create the factorization matrix */ 3160 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 3161 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3162 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3163 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3164 ierr = MatSetUp(B);CHKERRQ(ierr); 3165 3166 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3167 if (ftype == MAT_FACTOR_LU) { 3168 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3169 B->factortype = MAT_FACTOR_LU; 3170 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3171 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3172 mumps->sym = 0; 3173 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3174 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 3175 3176 B->ops->view = MatView_MUMPS; 3177 B->ops->getinfo = MatGetInfo_MUMPS; 3178 3179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3187 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3188 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3189 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3190 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3191 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3192 3193 /* set solvertype */ 3194 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3195 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3196 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3197 if (size == 1) { 3198 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3199 B->canuseordering = PETSC_TRUE; 3200 } 3201 B->ops->destroy = MatDestroy_MUMPS; 3202 B->data = (void*)mumps; 3203 3204 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3205 3206 *F = B; 3207 PetscFunctionReturn(0); 3208 } 3209 3210 /* MatGetFactor for Seq and MPI SELL matrices */ 3211 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3212 { 3213 Mat B; 3214 PetscErrorCode ierr; 3215 Mat_MUMPS *mumps; 3216 PetscBool isSeqSELL; 3217 PetscMPIInt size; 3218 3219 PetscFunctionBegin; 3220 /* Create the factorization matrix */ 3221 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3222 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3223 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3224 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3225 ierr = MatSetUp(B);CHKERRQ(ierr); 3226 3227 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3228 3229 B->ops->view = MatView_MUMPS; 3230 B->ops->getinfo = MatGetInfo_MUMPS; 3231 3232 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3233 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3234 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3235 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3236 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3237 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3238 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3239 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3240 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3241 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3242 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3243 3244 if (ftype == MAT_FACTOR_LU) { 3245 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3246 B->factortype = MAT_FACTOR_LU; 3247 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3248 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3249 mumps->sym = 0; 3250 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3251 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3252 3253 /* set solvertype */ 3254 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3255 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3256 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3257 if (size == 1) { 3258 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3259 B->canuseordering = PETSC_TRUE; 3260 } 3261 B->ops->destroy = MatDestroy_MUMPS; 3262 B->data = (void*)mumps; 3263 3264 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3265 3266 *F = B; 3267 PetscFunctionReturn(0); 3268 } 3269 3270 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3271 { 3272 PetscErrorCode ierr; 3273 3274 PetscFunctionBegin; 3275 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3276 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3277 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3278 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3279 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3280 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3281 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3282 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3283 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3284 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3285 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3286 PetscFunctionReturn(0); 3287 } 3288 3289