1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 71 *b = (PetscMUMPSInt)(a); 72 PetscFunctionReturn(0); 73 } 74 75 /* Put these utility routines here since they are only used in this file */ 76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 77 { 78 PetscErrorCode ierr; 79 PetscInt myval; 80 PetscBool myset; 81 PetscFunctionBegin; 82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 83 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 84 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 85 if (set) *set = myset; 86 PetscFunctionReturn(0); 87 } 88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 89 90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 91 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 92 #define PetscMUMPS_c(mumps) \ 93 do { \ 94 if (mumps->use_petsc_omp_support) { \ 95 if (mumps->is_omp_master) { \ 96 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 MUMPS_c(&mumps->id); \ 98 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 99 } \ 100 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 105 */ \ 106 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 107 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 108 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 109 } else { \ 110 MUMPS_c(&mumps->id); \ 111 } \ 112 } while (0) 113 #else 114 #define PetscMUMPS_c(mumps) \ 115 do { MUMPS_c(&mumps->id); } while (0) 116 #endif 117 118 /* declare MumpsScalar */ 119 #if defined(PETSC_USE_COMPLEX) 120 #if defined(PETSC_USE_REAL_SINGLE) 121 #define MumpsScalar mumps_complex 122 #else 123 #define MumpsScalar mumps_double_complex 124 #endif 125 #else 126 #define MumpsScalar PetscScalar 127 #endif 128 129 /* macros s.t. indices match MUMPS documentation */ 130 #define ICNTL(I) icntl[(I)-1] 131 #define CNTL(I) cntl[(I)-1] 132 #define INFOG(I) infog[(I)-1] 133 #define INFO(I) info[(I)-1] 134 #define RINFOG(I) rinfog[(I)-1] 135 #define RINFO(I) rinfo[(I)-1] 136 137 typedef struct Mat_MUMPS Mat_MUMPS; 138 struct Mat_MUMPS { 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_REAL_SINGLE) 141 CMUMPS_STRUC_C id; 142 #else 143 ZMUMPS_STRUC_C id; 144 #endif 145 #else 146 #if defined(PETSC_USE_REAL_SINGLE) 147 SMUMPS_STRUC_C id; 148 #else 149 DMUMPS_STRUC_C id; 150 #endif 151 #endif 152 153 MatStructure matstruc; 154 PetscMPIInt myid,petsc_size; 155 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 156 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 158 PetscMUMPSInt sym; 159 MPI_Comm mumps_comm; 160 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 161 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 162 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 163 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 164 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 165 PetscInt *rhs_nrow,max_nrhs; 166 PetscMPIInt *rhs_recvcounts,*rhs_disps; 167 PetscScalar *rhs_loc,*rhs_recvbuf; 168 #endif 169 Vec b_seq,x_seq; 170 PetscInt ninfo,*info; /* which INFO to display */ 171 PetscInt sizeredrhs; 172 PetscScalar *schur_sol; 173 PetscInt schur_sizesol; 174 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 175 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 176 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 177 178 /* stuff used by petsc/mumps OpenMP support*/ 179 PetscBool use_petsc_omp_support; 180 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 181 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 182 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 183 PetscMPIInt tag,omp_comm_size; 184 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 185 MPI_Request *reqs; 186 }; 187 188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 189 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 190 */ 191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 192 { 193 PetscErrorCode ierr; 194 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_64BIT_INDICES) 198 { 199 PetscInt i; 200 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 201 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 202 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 203 mumps->cur_ilen = nrow+1; 204 } 205 if (nnz > mumps->cur_jlen) { 206 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 207 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 208 mumps->cur_jlen = nnz; 209 } 210 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 211 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 212 *ia_mumps = mumps->ia_alloc; 213 *ja_mumps = mumps->ja_alloc; 214 } 215 #else 216 *ia_mumps = ia; 217 *ja_mumps = ja; 218 #endif 219 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 224 { 225 PetscErrorCode ierr; 226 227 PetscFunctionBegin; 228 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 229 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 230 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 231 mumps->id.size_schur = 0; 232 mumps->id.schur_lld = 0; 233 mumps->id.ICNTL(19) = 0; 234 PetscFunctionReturn(0); 235 } 236 237 /* solve with rhs in mumps->id.redrhs and return in the same location */ 238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 239 { 240 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 241 Mat S,B,X; 242 MatFactorSchurStatus schurstatus; 243 PetscInt sizesol; 244 PetscErrorCode ierr; 245 246 PetscFunctionBegin; 247 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 248 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 250 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 252 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 253 #endif 254 switch (schurstatus) { 255 case MAT_FACTOR_SCHUR_FACTORED: 256 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 257 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 259 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 260 #endif 261 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 262 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 263 } else { 264 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 265 } 266 break; 267 case MAT_FACTOR_SCHUR_INVERTED: 268 sizesol = mumps->id.nrhs*mumps->id.size_schur; 269 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 270 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 271 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 272 mumps->schur_sizesol = sizesol; 273 } 274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 275 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 277 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 278 #endif 279 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 280 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 281 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 282 } else { 283 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 284 } 285 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 286 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 287 ierr = MatProductNumeric(X);CHKERRQ(ierr); 288 289 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 290 break; 291 default: 292 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 293 } 294 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 295 ierr = MatDestroy(&B);CHKERRQ(ierr); 296 ierr = MatDestroy(&X);CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 301 { 302 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 307 PetscFunctionReturn(0); 308 } 309 if (!expansion) { /* prepare for the condensation step */ 310 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 311 /* allocate MUMPS internal array to store reduced right-hand sides */ 312 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 313 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 314 mumps->id.lredrhs = mumps->id.size_schur; 315 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 316 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 317 } 318 mumps->id.ICNTL(26) = 1; /* condensation phase */ 319 } else { /* prepare for the expansion step */ 320 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 321 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 322 mumps->id.ICNTL(26) = 2; /* expansion phase */ 323 PetscMUMPS_c(mumps); 324 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 325 /* restore defaults */ 326 mumps->id.ICNTL(26) = -1; 327 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 328 if (mumps->id.nrhs > 1) { 329 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 330 mumps->id.lredrhs = 0; 331 mumps->sizeredrhs = 0; 332 } 333 } 334 PetscFunctionReturn(0); 335 } 336 337 /* 338 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 339 340 input: 341 A - matrix in aij,baij or sbaij format 342 shift - 0: C style output triple; 1: Fortran style output triple. 343 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 344 MAT_REUSE_MATRIX: only the values in v array are updated 345 output: 346 nnz - dim of r, c, and v (number of local nonzero entries of A) 347 r, c, v - row and col index, matrix values (matrix triples) 348 349 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 350 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 351 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 352 353 */ 354 355 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 356 { 357 const PetscScalar *av; 358 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 359 PetscInt64 nz,rnz,i,j,k; 360 PetscErrorCode ierr; 361 PetscMUMPSInt *row,*col; 362 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 363 364 PetscFunctionBegin; 365 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 366 mumps->val = (PetscScalar*)av; 367 if (reuse == MAT_INITIAL_MATRIX) { 368 nz = aa->nz; 369 ai = aa->i; 370 aj = aa->j; 371 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 372 for (i=k=0; i<M; i++) { 373 rnz = ai[i+1] - ai[i]; 374 ajj = aj + ai[i]; 375 for (j=0; j<rnz; j++) { 376 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 377 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 378 k++; 379 } 380 } 381 mumps->irn = row; 382 mumps->jcn = col; 383 mumps->nnz = nz; 384 } 385 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 390 { 391 PetscErrorCode ierr; 392 PetscInt64 nz,i,j,k,r; 393 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 394 PetscMUMPSInt *row,*col; 395 396 PetscFunctionBegin; 397 mumps->val = a->val; 398 if (reuse == MAT_INITIAL_MATRIX) { 399 nz = a->sliidx[a->totalslices]; 400 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 401 for (i=k=0; i<a->totalslices; i++) { 402 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 403 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 404 } 405 } 406 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 407 mumps->irn = row; 408 mumps->jcn = col; 409 mumps->nnz = nz; 410 } 411 PetscFunctionReturn(0); 412 } 413 414 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 415 { 416 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 417 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 418 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 419 PetscInt bs; 420 PetscErrorCode ierr; 421 PetscMUMPSInt *row,*col; 422 423 PetscFunctionBegin; 424 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 425 M = A->rmap->N/bs; 426 mumps->val = aa->a; 427 if (reuse == MAT_INITIAL_MATRIX) { 428 ai = aa->i; aj = aa->j; 429 nz = bs2*aa->nz; 430 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 431 for (i=0; i<M; i++) { 432 ajj = aj + ai[i]; 433 rnz = ai[i+1] - ai[i]; 434 for (k=0; k<rnz; k++) { 435 for (j=0; j<bs; j++) { 436 for (m=0; m<bs; m++) { 437 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 438 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 439 idx++; 440 } 441 } 442 } 443 } 444 mumps->irn = row; 445 mumps->jcn = col; 446 mumps->nnz = nz; 447 } 448 PetscFunctionReturn(0); 449 } 450 451 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 452 { 453 const PetscInt *ai, *aj,*ajj; 454 PetscInt bs; 455 PetscInt64 nz,rnz,i,j,k,m; 456 PetscErrorCode ierr; 457 PetscMUMPSInt *row,*col; 458 PetscScalar *val; 459 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 460 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 461 #if defined(PETSC_USE_COMPLEX) 462 PetscBool hermitian; 463 #endif 464 465 PetscFunctionBegin; 466 #if defined(PETSC_USE_COMPLEX) 467 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 468 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 469 #endif 470 ai = aa->i; 471 aj = aa->j; 472 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 473 if (reuse == MAT_INITIAL_MATRIX) { 474 nz = aa->nz; 475 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 476 if (bs>1) { 477 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 478 mumps->val = mumps->val_alloc; 479 } else { 480 mumps->val = aa->a; 481 } 482 mumps->irn = row; 483 mumps->jcn = col; 484 } else { 485 if (bs == 1) mumps->val = aa->a; 486 row = mumps->irn; 487 col = mumps->jcn; 488 } 489 val = mumps->val; 490 491 nz = 0; 492 if (bs>1) { 493 for (i=0; i<mbs; i++) { 494 rnz = ai[i+1] - ai[i]; 495 ajj = aj + ai[i]; 496 for (j=0; j<rnz; j++) { 497 for (k=0; k<bs; k++) { 498 for (m=0; m<bs; m++) { 499 if (ajj[j]>i || k>=m) { 500 if (reuse == MAT_INITIAL_MATRIX) { 501 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 502 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 503 } 504 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 505 } 506 } 507 } 508 } 509 } 510 } else if (reuse == MAT_INITIAL_MATRIX) { 511 for (i=0; i<mbs; i++) { 512 rnz = ai[i+1] - ai[i]; 513 ajj = aj + ai[i]; 514 for (j=0; j<rnz; j++) { 515 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 516 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 517 nz++; 518 } 519 } 520 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 521 } 522 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 527 { 528 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 529 PetscInt64 nz,rnz,i,j; 530 const PetscScalar *av,*v1; 531 PetscScalar *val; 532 PetscErrorCode ierr; 533 PetscMUMPSInt *row,*col; 534 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 535 PetscBool missing; 536 #if defined(PETSC_USE_COMPLEX) 537 PetscBool hermitian; 538 #endif 539 540 PetscFunctionBegin; 541 #if defined(PETSC_USE_COMPLEX) 542 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 543 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 544 #endif 545 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 546 ai = aa->i; aj = aa->j; 547 adiag = aa->diag; 548 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 549 if (reuse == MAT_INITIAL_MATRIX) { 550 /* count nz in the upper triangular part of A */ 551 nz = 0; 552 if (missing) { 553 for (i=0; i<M; i++) { 554 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 555 for (j=ai[i];j<ai[i+1];j++) { 556 if (aj[j] < i) continue; 557 nz++; 558 } 559 } else { 560 nz += ai[i+1] - adiag[i]; 561 } 562 } 563 } else { 564 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 565 } 566 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 567 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 568 mumps->nnz = nz; 569 mumps->irn = row; 570 mumps->jcn = col; 571 mumps->val = mumps->val_alloc = val; 572 573 nz = 0; 574 if (missing) { 575 for (i=0; i<M; i++) { 576 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 577 for (j=ai[i];j<ai[i+1];j++) { 578 if (aj[j] < i) continue; 579 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 580 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 581 val[nz] = av[j]; 582 nz++; 583 } 584 } else { 585 rnz = ai[i+1] - adiag[i]; 586 ajj = aj + adiag[i]; 587 v1 = av + adiag[i]; 588 for (j=0; j<rnz; j++) { 589 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 590 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 591 val[nz++] = v1[j]; 592 } 593 } 594 } 595 } else { 596 for (i=0; i<M; i++) { 597 rnz = ai[i+1] - adiag[i]; 598 ajj = aj + adiag[i]; 599 v1 = av + adiag[i]; 600 for (j=0; j<rnz; j++) { 601 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 602 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 603 val[nz++] = v1[j]; 604 } 605 } 606 } 607 } else { 608 nz = 0; 609 val = mumps->val; 610 if (missing) { 611 for (i=0; i <M; i++) { 612 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 613 for (j=ai[i];j<ai[i+1];j++) { 614 if (aj[j] < i) continue; 615 val[nz++] = av[j]; 616 } 617 } else { 618 rnz = ai[i+1] - adiag[i]; 619 v1 = av + adiag[i]; 620 for (j=0; j<rnz; j++) { 621 val[nz++] = v1[j]; 622 } 623 } 624 } 625 } else { 626 for (i=0; i <M; i++) { 627 rnz = ai[i+1] - adiag[i]; 628 v1 = av + adiag[i]; 629 for (j=0; j<rnz; j++) { 630 val[nz++] = v1[j]; 631 } 632 } 633 } 634 } 635 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 640 { 641 PetscErrorCode ierr; 642 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 643 PetscInt bs; 644 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 645 PetscMUMPSInt *row,*col; 646 const PetscScalar *av,*bv,*v1,*v2; 647 PetscScalar *val; 648 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 649 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 650 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 651 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 652 #if defined(PETSC_USE_COMPLEX) 653 PetscBool hermitian; 654 #endif 655 656 PetscFunctionBegin; 657 #if defined(PETSC_USE_COMPLEX) 658 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 659 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 660 #endif 661 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 662 rstart = A->rmap->rstart; 663 ai = aa->i; 664 aj = aa->j; 665 bi = bb->i; 666 bj = bb->j; 667 av = aa->a; 668 bv = bb->a; 669 670 garray = mat->garray; 671 672 if (reuse == MAT_INITIAL_MATRIX) { 673 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 674 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 675 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 676 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 677 mumps->irn = row; 678 mumps->jcn = col; 679 mumps->val = mumps->val_alloc = val; 680 } else { 681 val = mumps->val; 682 } 683 684 jj = 0; irow = rstart; 685 for (i=0; i<mbs; i++) { 686 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 687 countA = ai[i+1] - ai[i]; 688 countB = bi[i+1] - bi[i]; 689 bjj = bj + bi[i]; 690 v1 = av + ai[i]*bs2; 691 v2 = bv + bi[i]*bs2; 692 693 if (bs>1) { 694 /* A-part */ 695 for (j=0; j<countA; j++) { 696 for (k=0; k<bs; k++) { 697 for (m=0; m<bs; m++) { 698 if (rstart + ajj[j]*bs>irow || k>=m) { 699 if (reuse == MAT_INITIAL_MATRIX) { 700 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 701 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 702 } 703 val[jj++] = v1[j*bs2 + m + k*bs]; 704 } 705 } 706 } 707 } 708 709 /* B-part */ 710 for (j=0; j < countB; j++) { 711 for (k=0; k<bs; k++) { 712 for (m=0; m<bs; m++) { 713 if (reuse == MAT_INITIAL_MATRIX) { 714 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 715 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 716 } 717 val[jj++] = v2[j*bs2 + m + k*bs]; 718 } 719 } 720 } 721 } else { 722 /* A-part */ 723 for (j=0; j<countA; j++) { 724 if (reuse == MAT_INITIAL_MATRIX) { 725 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 726 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 727 } 728 val[jj++] = v1[j]; 729 } 730 731 /* B-part */ 732 for (j=0; j < countB; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 735 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 736 } 737 val[jj++] = v2[j]; 738 } 739 } 740 irow+=bs; 741 } 742 mumps->nnz = jj; 743 PetscFunctionReturn(0); 744 } 745 746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 747 { 748 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 749 PetscErrorCode ierr; 750 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 751 PetscMUMPSInt *row,*col; 752 const PetscScalar *av, *bv,*v1,*v2; 753 PetscScalar *val; 754 Mat Ad,Ao; 755 Mat_SeqAIJ *aa; 756 Mat_SeqAIJ *bb; 757 758 PetscFunctionBegin; 759 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 760 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 761 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 762 763 aa = (Mat_SeqAIJ*)(Ad)->data; 764 bb = (Mat_SeqAIJ*)(Ao)->data; 765 ai = aa->i; 766 aj = aa->j; 767 bi = bb->i; 768 bj = bb->j; 769 770 rstart = A->rmap->rstart; 771 772 if (reuse == MAT_INITIAL_MATRIX) { 773 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 774 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 775 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 776 mumps->nnz = nz; 777 mumps->irn = row; 778 mumps->jcn = col; 779 mumps->val = mumps->val_alloc = val; 780 } else { 781 val = mumps->val; 782 } 783 784 jj = 0; irow = rstart; 785 for (i=0; i<m; i++) { 786 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 787 countA = ai[i+1] - ai[i]; 788 countB = bi[i+1] - bi[i]; 789 bjj = bj + bi[i]; 790 v1 = av + ai[i]; 791 v2 = bv + bi[i]; 792 793 /* A-part */ 794 for (j=0; j<countA; j++) { 795 if (reuse == MAT_INITIAL_MATRIX) { 796 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 797 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 798 } 799 val[jj++] = v1[j]; 800 } 801 802 /* B-part */ 803 for (j=0; j < countB; j++) { 804 if (reuse == MAT_INITIAL_MATRIX) { 805 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 806 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 807 } 808 val[jj++] = v2[j]; 809 } 810 irow++; 811 } 812 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 813 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 818 { 819 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 820 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 821 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 822 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 823 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 824 const PetscInt bs2=mat->bs2; 825 PetscErrorCode ierr; 826 PetscInt bs; 827 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 828 PetscMUMPSInt *row,*col; 829 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 830 PetscScalar *val; 831 832 PetscFunctionBegin; 833 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 834 if (reuse == MAT_INITIAL_MATRIX) { 835 nz = bs2*(aa->nz + bb->nz); 836 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 837 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 838 mumps->nnz = nz; 839 mumps->irn = row; 840 mumps->jcn = col; 841 mumps->val = mumps->val_alloc = val; 842 } else { 843 val = mumps->val; 844 } 845 846 jj = 0; irow = rstart; 847 for (i=0; i<mbs; i++) { 848 countA = ai[i+1] - ai[i]; 849 countB = bi[i+1] - bi[i]; 850 ajj = aj + ai[i]; 851 bjj = bj + bi[i]; 852 v1 = av + bs2*ai[i]; 853 v2 = bv + bs2*bi[i]; 854 855 idx = 0; 856 /* A-part */ 857 for (k=0; k<countA; k++) { 858 for (j=0; j<bs; j++) { 859 for (n=0; n<bs; n++) { 860 if (reuse == MAT_INITIAL_MATRIX) { 861 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 862 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 863 } 864 val[jj++] = v1[idx++]; 865 } 866 } 867 } 868 869 idx = 0; 870 /* B-part */ 871 for (k=0; k<countB; k++) { 872 for (j=0; j<bs; j++) { 873 for (n=0; n<bs; n++) { 874 if (reuse == MAT_INITIAL_MATRIX) { 875 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 876 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 877 } 878 val[jj++] = v2[idx++]; 879 } 880 } 881 } 882 irow += bs; 883 } 884 PetscFunctionReturn(0); 885 } 886 887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 888 { 889 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 890 PetscErrorCode ierr; 891 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 892 PetscMUMPSInt *row,*col; 893 const PetscScalar *av, *bv,*v1,*v2; 894 PetscScalar *val; 895 Mat Ad,Ao; 896 Mat_SeqAIJ *aa; 897 Mat_SeqAIJ *bb; 898 #if defined(PETSC_USE_COMPLEX) 899 PetscBool hermitian; 900 #endif 901 902 PetscFunctionBegin; 903 #if defined(PETSC_USE_COMPLEX) 904 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 905 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 906 #endif 907 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 908 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 909 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 910 911 aa = (Mat_SeqAIJ*)(Ad)->data; 912 bb = (Mat_SeqAIJ*)(Ao)->data; 913 ai = aa->i; 914 aj = aa->j; 915 adiag = aa->diag; 916 bi = bb->i; 917 bj = bb->j; 918 919 rstart = A->rmap->rstart; 920 921 if (reuse == MAT_INITIAL_MATRIX) { 922 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 923 nzb = 0; /* num of upper triangular entries in mat->B */ 924 for (i=0; i<m; i++) { 925 nza += (ai[i+1] - adiag[i]); 926 countB = bi[i+1] - bi[i]; 927 bjj = bj + bi[i]; 928 for (j=0; j<countB; j++) { 929 if (garray[bjj[j]] > rstart) nzb++; 930 } 931 } 932 933 nz = nza + nzb; /* total nz of upper triangular part of mat */ 934 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 935 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 936 mumps->nnz = nz; 937 mumps->irn = row; 938 mumps->jcn = col; 939 mumps->val = mumps->val_alloc = val; 940 } else { 941 val = mumps->val; 942 } 943 944 jj = 0; irow = rstart; 945 for (i=0; i<m; i++) { 946 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 947 v1 = av + adiag[i]; 948 countA = ai[i+1] - adiag[i]; 949 countB = bi[i+1] - bi[i]; 950 bjj = bj + bi[i]; 951 v2 = bv + bi[i]; 952 953 /* A-part */ 954 for (j=0; j<countA; j++) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 957 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 958 } 959 val[jj++] = v1[j]; 960 } 961 962 /* B-part */ 963 for (j=0; j < countB; j++) { 964 if (garray[bjj[j]] > rstart) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 967 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 968 } 969 val[jj++] = v2[j]; 970 } 971 } 972 irow++; 973 } 974 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 975 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 PetscErrorCode MatDestroy_MUMPS(Mat A) 980 { 981 PetscErrorCode ierr; 982 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 983 984 PetscFunctionBegin; 985 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 986 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 987 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 988 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 989 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 990 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 991 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 993 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 994 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 995 mumps->id.job = JOB_END; 996 PetscMUMPS_c(mumps); 997 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 998 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 999 if (mumps->use_petsc_omp_support) { 1000 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); 1001 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1002 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr); 1003 } 1004 #endif 1005 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 1006 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 1007 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 1008 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 1009 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr); 1010 if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&mumps->mumps_comm);CHKERRMPI(ierr);} 1011 ierr = PetscFree(A->data);CHKERRQ(ierr); 1012 1013 /* clear composed functions */ 1014 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1015 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1016 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1017 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1018 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1031 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1032 { 1033 PetscErrorCode ierr; 1034 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1035 const PetscMPIInt ompsize=mumps->omp_comm_size; 1036 PetscInt i,m,M,rstart; 1037 1038 PetscFunctionBegin; 1039 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 1040 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 1041 if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1042 if (ompsize == 1) { 1043 if (!mumps->irhs_loc) { 1044 mumps->nloc_rhs = m; 1045 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1047 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1048 } 1049 mumps->id.rhs_loc = (MumpsScalar*)array; 1050 } else { 1051 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1052 const PetscInt *ranges; 1053 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1054 MPI_Group petsc_group,omp_group; 1055 PetscScalar *recvbuf=NULL; 1056 1057 if (mumps->is_omp_master) { 1058 /* Lazily initialize the omp stuff for distributed rhs */ 1059 if (!mumps->irhs_loc) { 1060 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr); 1061 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr); 1062 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr); 1063 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr); 1064 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1065 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr); 1066 1067 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1068 mumps->nloc_rhs = 0; 1069 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr); 1070 for (j=0; j<ompsize; j++) { 1071 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1072 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1073 } 1074 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr); 1075 for (j=k=0; j<ompsize; j++) { 1076 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1077 } 1078 1079 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr); 1080 ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr); 1081 ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr); 1082 } 1083 1084 /* Realloc buffers when current nrhs is bigger than what we have met */ 1085 if (nrhs > mumps->max_nrhs) { 1086 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1087 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr); 1088 mumps->max_nrhs = nrhs; 1089 } 1090 1091 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1092 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);} 1093 mumps->rhs_disps[0] = 0; 1094 for (j=1; j<ompsize; j++) { 1095 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1096 if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1097 } 1098 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1099 } 1100 1101 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr); 1102 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr); 1103 1104 if (mumps->is_omp_master) { 1105 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1106 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1107 for (j=0; j<ompsize; j++) { 1108 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1109 dst = dstbase; 1110 for (i=0; i<nrhs; i++) { 1111 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr); 1112 src += mumps->rhs_nrow[j]; 1113 dst += mumps->nloc_rhs; 1114 } 1115 dstbase += mumps->rhs_nrow[j]; 1116 } 1117 } 1118 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1119 } 1120 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1121 } 1122 mumps->id.nrhs = nrhs; 1123 mumps->id.nloc_rhs = mumps->nloc_rhs; 1124 mumps->id.lrhs_loc = mumps->nloc_rhs; 1125 mumps->id.irhs_loc = mumps->irhs_loc; 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1130 { 1131 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1132 const PetscScalar *rarray = NULL; 1133 PetscScalar *array; 1134 IS is_iden,is_petsc; 1135 PetscErrorCode ierr; 1136 PetscInt i; 1137 PetscBool second_solve = PETSC_FALSE; 1138 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1139 1140 PetscFunctionBegin; 1141 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1142 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1143 1144 if (A->factorerrortype) { 1145 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1146 ierr = VecSetInf(x);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 mumps->id.nrhs = 1; 1151 if (mumps->petsc_size > 1) { 1152 if (mumps->ICNTL20 == 10) { 1153 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1154 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr); 1155 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr); 1156 } else { 1157 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1158 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 if (!mumps->myid) { 1161 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr); 1162 mumps->id.rhs = (MumpsScalar*)array; 1163 } 1164 } 1165 } else { /* petsc_size == 1 */ 1166 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1167 ierr = VecCopy(b,x);CHKERRQ(ierr); 1168 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1169 mumps->id.rhs = (MumpsScalar*)array; 1170 } 1171 1172 /* 1173 handle condensation step of Schur complement (if any) 1174 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1175 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1176 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1177 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1178 */ 1179 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1180 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1181 second_solve = PETSC_TRUE; 1182 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1183 } 1184 /* solve phase */ 1185 /*-------------*/ 1186 mumps->id.job = JOB_SOLVE; 1187 PetscMUMPS_c(mumps); 1188 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1189 1190 /* handle expansion step of Schur complement (if any) */ 1191 if (second_solve) { 1192 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1193 } 1194 1195 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1196 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1197 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1198 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1199 } 1200 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1201 PetscInt *isol2_loc=NULL; 1202 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1203 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1204 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1205 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1206 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1207 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1209 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1210 } 1211 1212 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 } 1215 1216 if (mumps->petsc_size > 1) { 1217 if (mumps->ICNTL20 == 10) { 1218 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr); 1219 } else if (!mumps->myid) { 1220 ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr); 1221 } 1222 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1223 1224 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1229 { 1230 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 mumps->id.ICNTL(9) = 0; 1235 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1236 mumps->id.ICNTL(9) = 1; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1241 { 1242 PetscErrorCode ierr; 1243 Mat Bt = NULL; 1244 PetscBool denseX,denseB,flg,flgT; 1245 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1246 PetscInt i,nrhs,M; 1247 PetscScalar *array; 1248 const PetscScalar *rbray; 1249 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1250 PetscMUMPSInt *isol_loc,*isol_loc_save; 1251 PetscScalar *bray,*sol_loc,*sol_loc_save; 1252 IS is_to,is_from; 1253 PetscInt k,proc,j,m,myrstart; 1254 const PetscInt *rstart; 1255 Vec v_mpi,msol_loc; 1256 VecScatter scat_sol; 1257 Vec b_seq; 1258 VecScatter scat_rhs; 1259 PetscScalar *aa; 1260 PetscInt spnr,*ia,*ja; 1261 Mat_MPIAIJ *b = NULL; 1262 1263 PetscFunctionBegin; 1264 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1265 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1266 1267 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1268 if (denseB) { 1269 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1270 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1271 } else { /* sparse B */ 1272 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1273 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1274 if (flgT) { /* input B is transpose of actural RHS matrix, 1275 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1276 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1277 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1278 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1279 } 1280 1281 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1282 mumps->id.nrhs = nrhs; 1283 mumps->id.lrhs = M; 1284 mumps->id.rhs = NULL; 1285 1286 if (mumps->petsc_size == 1) { 1287 PetscScalar *aa; 1288 PetscInt spnr,*ia,*ja; 1289 PetscBool second_solve = PETSC_FALSE; 1290 1291 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1292 mumps->id.rhs = (MumpsScalar*)array; 1293 1294 if (denseB) { 1295 /* copy B to X */ 1296 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1297 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1298 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1299 } else { /* sparse B */ 1300 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1301 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1302 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1303 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1304 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1305 } 1306 /* handle condensation step of Schur complement (if any) */ 1307 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1308 second_solve = PETSC_TRUE; 1309 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1310 } 1311 /* solve phase */ 1312 /*-------------*/ 1313 mumps->id.job = JOB_SOLVE; 1314 PetscMUMPS_c(mumps); 1315 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1316 1317 /* handle expansion step of Schur complement (if any) */ 1318 if (second_solve) { 1319 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1320 } 1321 if (!denseB) { /* sparse B */ 1322 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1323 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1324 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1325 } 1326 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1331 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1332 1333 /* create msol_loc to hold mumps local solution */ 1334 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1335 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1336 1337 lsol_loc = mumps->id.lsol_loc; 1338 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1339 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1340 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1341 mumps->id.isol_loc = isol_loc; 1342 1343 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1344 1345 if (denseB) { 1346 if (mumps->ICNTL20 == 10) { 1347 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1348 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1349 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr); 1350 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1351 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1352 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr); 1353 } else { 1354 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1355 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1356 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1357 0, re-arrange B into desired order, which is a local operation. 1358 */ 1359 1360 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1361 /* wrap dense rhs matrix B into a vector v_mpi */ 1362 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1363 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1364 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1365 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1366 1367 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1368 if (!mumps->myid) { 1369 PetscInt *idx; 1370 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1371 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1372 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1373 k = 0; 1374 for (proc=0; proc<mumps->petsc_size; proc++) { 1375 for (j=0; j<nrhs; j++) { 1376 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1377 } 1378 } 1379 1380 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1381 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1382 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1383 } else { 1384 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1385 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1386 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1387 } 1388 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1389 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1390 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1391 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1392 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1393 1394 if (!mumps->myid) { /* define rhs on the host */ 1395 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1396 mumps->id.rhs = (MumpsScalar*)bray; 1397 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1398 } 1399 } 1400 } else { /* sparse B */ 1401 b = (Mat_MPIAIJ*)Bt->data; 1402 1403 /* wrap dense X into a vector v_mpi */ 1404 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1405 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1406 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1407 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1408 1409 if (!mumps->myid) { 1410 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1411 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1412 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1413 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1414 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1415 } else { 1416 mumps->id.irhs_ptr = NULL; 1417 mumps->id.irhs_sparse = NULL; 1418 mumps->id.nz_rhs = 0; 1419 mumps->id.rhs_sparse = NULL; 1420 } 1421 } 1422 1423 /* solve phase */ 1424 /*-------------*/ 1425 mumps->id.job = JOB_SOLVE; 1426 PetscMUMPS_c(mumps); 1427 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1428 1429 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1430 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1431 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1432 1433 /* create scatter scat_sol */ 1434 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1435 /* iidx: index for scatter mumps solution to petsc X */ 1436 1437 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1438 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1439 for (i=0; i<lsol_loc; i++) { 1440 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1441 1442 for (proc=0; proc<mumps->petsc_size; proc++) { 1443 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1444 myrstart = rstart[proc]; 1445 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1446 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1447 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1448 break; 1449 } 1450 } 1451 1452 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1453 } 1454 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1455 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1456 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1458 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1459 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1460 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1461 1462 /* free spaces */ 1463 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1464 mumps->id.isol_loc = isol_loc_save; 1465 1466 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1467 ierr = PetscFree(idxx);CHKERRQ(ierr); 1468 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1469 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1470 if (!denseB) { 1471 if (!mumps->myid) { 1472 b = (Mat_MPIAIJ*)Bt->data; 1473 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1474 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1475 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1476 } 1477 } else { 1478 if (mumps->ICNTL20 == 0) { 1479 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1480 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1481 } 1482 } 1483 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1484 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1489 { 1490 PetscErrorCode ierr; 1491 PetscBool flg; 1492 Mat B; 1493 1494 PetscFunctionBegin; 1495 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1496 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1497 1498 /* Create B=Bt^T that uses Bt's data structure */ 1499 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1500 1501 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1502 ierr = MatDestroy(&B);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #if !defined(PETSC_USE_COMPLEX) 1507 /* 1508 input: 1509 F: numeric factor 1510 output: 1511 nneg: total number of negative pivots 1512 nzero: total number of zero pivots 1513 npos: (global dimension of F) - nneg - nzero 1514 */ 1515 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1516 { 1517 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1518 PetscErrorCode ierr; 1519 PetscMPIInt size; 1520 1521 PetscFunctionBegin; 1522 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr); 1523 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1524 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1525 1526 if (nneg) *nneg = mumps->id.INFOG(12); 1527 if (nzero || npos) { 1528 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1529 if (nzero) *nzero = mumps->id.INFOG(28); 1530 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1531 } 1532 PetscFunctionReturn(0); 1533 } 1534 #endif 1535 1536 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1537 { 1538 PetscErrorCode ierr; 1539 PetscInt i,nreqs; 1540 PetscMUMPSInt *irn,*jcn; 1541 PetscMPIInt count; 1542 PetscInt64 totnnz,remain; 1543 const PetscInt osize=mumps->omp_comm_size; 1544 PetscScalar *val; 1545 1546 PetscFunctionBegin; 1547 if (osize > 1) { 1548 if (reuse == MAT_INITIAL_MATRIX) { 1549 /* master first gathers counts of nonzeros to receive */ 1550 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1551 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr); 1552 1553 /* Then each computes number of send/recvs */ 1554 if (mumps->is_omp_master) { 1555 /* Start from 1 since self communication is not done in MPI */ 1556 nreqs = 0; 1557 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1558 } else { 1559 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1560 } 1561 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1562 1563 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1564 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1565 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1566 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1567 */ 1568 nreqs = 0; /* counter for actual send/recvs */ 1569 if (mumps->is_omp_master) { 1570 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1571 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1572 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1573 1574 /* Self communication */ 1575 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1576 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1577 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1578 1579 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1580 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1581 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1582 mumps->nnz = totnnz; 1583 mumps->irn = irn; 1584 mumps->jcn = jcn; 1585 mumps->val = mumps->val_alloc = val; 1586 1587 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1588 jcn += mumps->recvcount[0]; 1589 val += mumps->recvcount[0]; 1590 1591 /* Remote communication */ 1592 for (i=1; i<osize; i++) { 1593 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1594 remain = mumps->recvcount[i] - count; 1595 while (count>0) { 1596 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1597 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1598 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1599 irn += count; 1600 jcn += count; 1601 val += count; 1602 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1603 remain -= count; 1604 } 1605 } 1606 } else { 1607 irn = mumps->irn; 1608 jcn = mumps->jcn; 1609 val = mumps->val; 1610 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1611 remain = mumps->nnz - count; 1612 while (count>0) { 1613 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1614 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1615 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1616 irn += count; 1617 jcn += count; 1618 val += count; 1619 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1620 remain -= count; 1621 } 1622 } 1623 } else { 1624 nreqs = 0; 1625 if (mumps->is_omp_master) { 1626 val = mumps->val + mumps->recvcount[0]; 1627 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1628 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1629 remain = mumps->recvcount[i] - count; 1630 while (count>0) { 1631 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1632 val += count; 1633 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1634 remain -= count; 1635 } 1636 } 1637 } else { 1638 val = mumps->val; 1639 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1640 remain = mumps->nnz - count; 1641 while (count>0) { 1642 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1643 val += count; 1644 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1645 remain -= count; 1646 } 1647 } 1648 } 1649 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 1650 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1651 } 1652 PetscFunctionReturn(0); 1653 } 1654 1655 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1656 { 1657 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1658 PetscErrorCode ierr; 1659 PetscBool isMPIAIJ; 1660 1661 PetscFunctionBegin; 1662 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1663 if (mumps->id.INFOG(1) == -6) { 1664 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1665 } 1666 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1671 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1672 1673 /* numerical factorization phase */ 1674 /*-------------------------------*/ 1675 mumps->id.job = JOB_FACTNUMERIC; 1676 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1677 if (!mumps->myid) { 1678 mumps->id.a = (MumpsScalar*)mumps->val; 1679 } 1680 } else { 1681 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1682 } 1683 PetscMUMPS_c(mumps); 1684 if (mumps->id.INFOG(1) < 0) { 1685 if (A->erroriffailure) { 1686 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1687 } else { 1688 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1689 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1690 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1691 } else if (mumps->id.INFOG(1) == -13) { 1692 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1693 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1694 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1695 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1696 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1697 } else { 1698 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1699 F->factorerrortype = MAT_FACTOR_OTHER; 1700 } 1701 } 1702 } 1703 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1704 1705 F->assembled = PETSC_TRUE; 1706 mumps->matstruc = SAME_NONZERO_PATTERN; 1707 if (F->schur) { /* reset Schur status to unfactored */ 1708 #if defined(PETSC_HAVE_CUDA) 1709 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1710 #endif 1711 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1712 mumps->id.ICNTL(19) = 2; 1713 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1714 } 1715 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1716 } 1717 1718 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1719 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1720 1721 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1722 if (mumps->petsc_size > 1) { 1723 PetscInt lsol_loc; 1724 PetscScalar *sol_loc; 1725 1726 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1727 1728 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1729 if (mumps->x_seq) { 1730 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1731 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1732 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1733 } 1734 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1735 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1736 mumps->id.lsol_loc = lsol_loc; 1737 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1738 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1739 } 1740 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 /* Sets MUMPS options from the options database */ 1745 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1746 { 1747 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1748 PetscErrorCode ierr; 1749 PetscMUMPSInt icntl=0; 1750 PetscInt info[80],i,ninfo=80; 1751 PetscBool flg=PETSC_FALSE; 1752 1753 PetscFunctionBegin; 1754 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1755 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1756 if (flg) mumps->id.ICNTL(1) = icntl; 1757 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1758 if (flg) mumps->id.ICNTL(2) = icntl; 1759 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1760 if (flg) mumps->id.ICNTL(3) = icntl; 1761 1762 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1763 if (flg) mumps->id.ICNTL(4) = icntl; 1764 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1765 1766 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1767 if (flg) mumps->id.ICNTL(6) = icntl; 1768 1769 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1770 if (flg) { 1771 if (icntl == 1 || icntl < 0 || icntl > 7) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto\n"); 1772 mumps->id.ICNTL(7) = icntl; 1773 } 1774 1775 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1776 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1777 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1778 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1780 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1781 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1782 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1783 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1784 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1785 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1786 } 1787 1788 /* MPICH Fortran MPI_IN_PLACE binding has a bug that prevents the use of 'mpi4py + mpich + mumps', e.g., by Firedrake. 1789 So we turn off distributed RHS for MPICH. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1790 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1791 */ 1792 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) && defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 1793 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1794 #else 1795 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1796 #endif 1797 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr); 1798 if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10\n",(int)mumps->ICNTL20); 1799 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1800 if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0\n"); 1801 #endif 1802 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1803 1804 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1805 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1806 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1807 if (mumps->id.ICNTL(24)) { 1808 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1809 } 1810 1811 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1812 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1813 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1814 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1815 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1816 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1817 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1818 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1819 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1820 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1821 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1822 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1823 1824 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1825 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1826 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1827 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1828 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1829 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1830 1831 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr); 1832 1833 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1834 if (ninfo) { 1835 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1836 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1837 mumps->ninfo = ninfo; 1838 for (i=0; i<ninfo; i++) { 1839 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1840 else mumps->info[i] = info[i]; 1841 } 1842 } 1843 1844 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1849 { 1850 PetscErrorCode ierr; 1851 PetscInt nthreads=0; 1852 MPI_Comm newcomm=MPI_COMM_NULL; 1853 1854 PetscFunctionBegin; 1855 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1856 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr); 1857 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */ 1858 1859 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1860 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1861 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1862 if (mumps->use_petsc_omp_support) { 1863 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1864 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1865 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1866 #else 1867 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1868 #endif 1869 } else { 1870 mumps->omp_comm = PETSC_COMM_SELF; 1871 mumps->mumps_comm = mumps->petsc_comm; 1872 mumps->is_omp_master = PETSC_TRUE; 1873 } 1874 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr); 1875 mumps->reqs = NULL; 1876 mumps->tag = 0; 1877 1878 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1879 if (mumps->mumps_comm != MPI_COMM_NULL) { 1880 ierr = MPI_Comm_dup(mumps->mumps_comm,&newcomm);CHKERRMPI(ierr); 1881 mumps->mumps_comm = newcomm; 1882 } 1883 1884 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1885 mumps->id.job = JOB_INIT; 1886 mumps->id.par = 1; /* host participates factorizaton and solve */ 1887 mumps->id.sym = mumps->sym; 1888 1889 PetscMUMPS_c(mumps); 1890 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1891 1892 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1893 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1894 */ 1895 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRMPI(ierr); 1896 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr); 1897 1898 mumps->scat_rhs = NULL; 1899 mumps->scat_sol = NULL; 1900 1901 /* set PETSc-MUMPS default options - override MUMPS default */ 1902 mumps->id.ICNTL(3) = 0; 1903 mumps->id.ICNTL(4) = 0; 1904 if (mumps->petsc_size == 1) { 1905 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1906 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1907 } else { 1908 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1909 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1910 } 1911 1912 /* schur */ 1913 mumps->id.size_schur = 0; 1914 mumps->id.listvar_schur = NULL; 1915 mumps->id.schur = NULL; 1916 mumps->sizeredrhs = 0; 1917 mumps->schur_sol = NULL; 1918 mumps->schur_sizesol = 0; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1923 { 1924 PetscErrorCode ierr; 1925 1926 PetscFunctionBegin; 1927 if (mumps->id.INFOG(1) < 0) { 1928 if (A->erroriffailure) { 1929 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1930 } else { 1931 if (mumps->id.INFOG(1) == -6) { 1932 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1933 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1934 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1935 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1936 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1937 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1938 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1939 } else { 1940 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1941 F->factorerrortype = MAT_FACTOR_OTHER; 1942 } 1943 } 1944 } 1945 PetscFunctionReturn(0); 1946 } 1947 1948 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1949 { 1950 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1951 PetscErrorCode ierr; 1952 Vec b; 1953 const PetscInt M = A->rmap->N; 1954 1955 PetscFunctionBegin; 1956 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1957 1958 /* Set MUMPS options from the options database */ 1959 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1960 1961 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1962 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1963 1964 /* analysis phase */ 1965 /*----------------*/ 1966 mumps->id.job = JOB_FACTSYMBOLIC; 1967 mumps->id.n = M; 1968 switch (mumps->id.ICNTL(18)) { 1969 case 0: /* centralized assembled matrix input */ 1970 if (!mumps->myid) { 1971 mumps->id.nnz = mumps->nnz; 1972 mumps->id.irn = mumps->irn; 1973 mumps->id.jcn = mumps->jcn; 1974 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1975 if (r) { 1976 mumps->id.ICNTL(7) = 1; 1977 if (!mumps->myid) { 1978 const PetscInt *idx; 1979 PetscInt i; 1980 1981 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1982 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1983 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1984 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1985 } 1986 } 1987 } 1988 break; 1989 case 3: /* distributed assembled matrix input (size>1) */ 1990 mumps->id.nnz_loc = mumps->nnz; 1991 mumps->id.irn_loc = mumps->irn; 1992 mumps->id.jcn_loc = mumps->jcn; 1993 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1994 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1995 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1996 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1997 ierr = VecDestroy(&b);CHKERRQ(ierr); 1998 } 1999 break; 2000 } 2001 PetscMUMPS_c(mumps); 2002 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2003 2004 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2005 F->ops->solve = MatSolve_MUMPS; 2006 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2007 F->ops->matsolve = MatMatSolve_MUMPS; 2008 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2009 PetscFunctionReturn(0); 2010 } 2011 2012 /* Note the Petsc r and c permutations are ignored */ 2013 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2014 { 2015 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2016 PetscErrorCode ierr; 2017 Vec b; 2018 const PetscInt M = A->rmap->N; 2019 2020 PetscFunctionBegin; 2021 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2022 2023 /* Set MUMPS options from the options database */ 2024 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2025 2026 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2027 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2028 2029 /* analysis phase */ 2030 /*----------------*/ 2031 mumps->id.job = JOB_FACTSYMBOLIC; 2032 mumps->id.n = M; 2033 switch (mumps->id.ICNTL(18)) { 2034 case 0: /* centralized assembled matrix input */ 2035 if (!mumps->myid) { 2036 mumps->id.nnz = mumps->nnz; 2037 mumps->id.irn = mumps->irn; 2038 mumps->id.jcn = mumps->jcn; 2039 if (mumps->id.ICNTL(6)>1) { 2040 mumps->id.a = (MumpsScalar*)mumps->val; 2041 } 2042 } 2043 break; 2044 case 3: /* distributed assembled matrix input (size>1) */ 2045 mumps->id.nnz_loc = mumps->nnz; 2046 mumps->id.irn_loc = mumps->irn; 2047 mumps->id.jcn_loc = mumps->jcn; 2048 if (mumps->id.ICNTL(6)>1) { 2049 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2050 } 2051 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2052 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2053 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2054 ierr = VecDestroy(&b);CHKERRQ(ierr); 2055 } 2056 break; 2057 } 2058 PetscMUMPS_c(mumps); 2059 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2060 2061 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2062 F->ops->solve = MatSolve_MUMPS; 2063 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2064 PetscFunctionReturn(0); 2065 } 2066 2067 /* Note the Petsc r permutation and factor info are ignored */ 2068 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2069 { 2070 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2071 PetscErrorCode ierr; 2072 Vec b; 2073 const PetscInt M = A->rmap->N; 2074 2075 PetscFunctionBegin; 2076 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2077 2078 /* Set MUMPS options from the options database */ 2079 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2080 2081 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2082 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2083 2084 /* analysis phase */ 2085 /*----------------*/ 2086 mumps->id.job = JOB_FACTSYMBOLIC; 2087 mumps->id.n = M; 2088 switch (mumps->id.ICNTL(18)) { 2089 case 0: /* centralized assembled matrix input */ 2090 if (!mumps->myid) { 2091 mumps->id.nnz = mumps->nnz; 2092 mumps->id.irn = mumps->irn; 2093 mumps->id.jcn = mumps->jcn; 2094 if (mumps->id.ICNTL(6)>1) { 2095 mumps->id.a = (MumpsScalar*)mumps->val; 2096 } 2097 } 2098 break; 2099 case 3: /* distributed assembled matrix input (size>1) */ 2100 mumps->id.nnz_loc = mumps->nnz; 2101 mumps->id.irn_loc = mumps->irn; 2102 mumps->id.jcn_loc = mumps->jcn; 2103 if (mumps->id.ICNTL(6)>1) { 2104 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2105 } 2106 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2107 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2108 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2109 ierr = VecDestroy(&b);CHKERRQ(ierr); 2110 } 2111 break; 2112 } 2113 PetscMUMPS_c(mumps); 2114 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2115 2116 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2117 F->ops->solve = MatSolve_MUMPS; 2118 F->ops->solvetranspose = MatSolve_MUMPS; 2119 F->ops->matsolve = MatMatSolve_MUMPS; 2120 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2121 #if defined(PETSC_USE_COMPLEX) 2122 F->ops->getinertia = NULL; 2123 #else 2124 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2125 #endif 2126 PetscFunctionReturn(0); 2127 } 2128 2129 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2130 { 2131 PetscErrorCode ierr; 2132 PetscBool iascii; 2133 PetscViewerFormat format; 2134 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2135 2136 PetscFunctionBegin; 2137 /* check if matrix is mumps type */ 2138 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2139 2140 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 2141 if (iascii) { 2142 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2143 if (format == PETSC_VIEWER_ASCII_INFO) { 2144 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 2145 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym);CHKERRQ(ierr); 2146 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par);CHKERRQ(ierr); 2147 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr); 2148 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr); 2149 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr); 2150 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr); 2151 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr); 2152 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr); 2153 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr); 2154 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr); 2155 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2156 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2157 if (mumps->id.ICNTL(11)>0) { 2158 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2159 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2160 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2161 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2162 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2164 } 2165 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2166 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2167 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2168 /* ICNTL(15-17) not used */ 2169 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2173 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2174 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2175 2176 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2177 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2178 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2179 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2180 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2181 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2182 2183 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2184 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2185 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2186 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2187 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2188 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2189 2190 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2191 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2192 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2193 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2194 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2195 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2196 2197 /* infomation local to each processor */ 2198 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2199 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2200 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2201 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2202 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2203 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2204 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2205 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2206 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2207 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2208 2209 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2210 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2211 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2212 2213 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2214 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2215 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2216 2217 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2218 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2219 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2220 2221 if (mumps->ninfo && mumps->ninfo <= 80) { 2222 PetscInt i; 2223 for (i=0; i<mumps->ninfo; i++) { 2224 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 2225 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2226 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2227 } 2228 } 2229 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2230 2231 if (!mumps->myid) { /* information from the host */ 2232 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2233 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2234 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2235 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2236 2237 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr); 2238 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr); 2239 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr); 2240 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr); 2241 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr); 2242 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr); 2243 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr); 2244 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr); 2245 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr); 2246 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr); 2247 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr); 2248 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr); 2249 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr); 2250 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr); 2251 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr); 2252 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr); 2254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr); 2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr); 2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr); 2257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr); 2258 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr); 2259 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr); 2260 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2261 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2262 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2263 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2264 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2265 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2267 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr); 2268 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr); 2269 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr); 2270 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr); 2271 } 2272 } 2273 } 2274 PetscFunctionReturn(0); 2275 } 2276 2277 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2278 { 2279 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2280 2281 PetscFunctionBegin; 2282 info->block_size = 1.0; 2283 info->nz_allocated = mumps->id.INFOG(20); 2284 info->nz_used = mumps->id.INFOG(20); 2285 info->nz_unneeded = 0.0; 2286 info->assemblies = 0.0; 2287 info->mallocs = 0.0; 2288 info->memory = 0.0; 2289 info->fill_ratio_given = 0; 2290 info->fill_ratio_needed = 0; 2291 info->factor_mallocs = 0; 2292 PetscFunctionReturn(0); 2293 } 2294 2295 /* -------------------------------------------------------------------------------------------*/ 2296 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2297 { 2298 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2299 const PetscScalar *arr; 2300 const PetscInt *idxs; 2301 PetscInt size,i; 2302 PetscErrorCode ierr; 2303 2304 PetscFunctionBegin; 2305 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2306 if (mumps->petsc_size > 1) { 2307 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2308 2309 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2310 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr); 2311 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 2312 } 2313 2314 /* Schur complement matrix */ 2315 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2316 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2317 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2318 mumps->id.schur = (MumpsScalar*)arr; 2319 mumps->id.size_schur = size; 2320 mumps->id.schur_lld = size; 2321 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2322 if (mumps->sym == 1) { 2323 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2324 } 2325 2326 /* MUMPS expects Fortran style indices */ 2327 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2328 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2329 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2330 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2331 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2332 if (mumps->petsc_size > 1) { 2333 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2334 } else { 2335 if (F->factortype == MAT_FACTOR_LU) { 2336 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2337 } else { 2338 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2339 } 2340 } 2341 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2342 mumps->id.ICNTL(26) = -1; 2343 PetscFunctionReturn(0); 2344 } 2345 2346 /* -------------------------------------------------------------------------------------------*/ 2347 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2348 { 2349 Mat St; 2350 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2351 PetscScalar *array; 2352 #if defined(PETSC_USE_COMPLEX) 2353 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2354 #endif 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2359 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2360 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2361 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2362 ierr = MatSetUp(St);CHKERRQ(ierr); 2363 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2364 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2365 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2366 PetscInt i,j,N=mumps->id.size_schur; 2367 for (i=0;i<N;i++) { 2368 for (j=0;j<N;j++) { 2369 #if !defined(PETSC_USE_COMPLEX) 2370 PetscScalar val = mumps->id.schur[i*N+j]; 2371 #else 2372 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2373 #endif 2374 array[j*N+i] = val; 2375 } 2376 } 2377 } else { /* stored by columns */ 2378 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2379 } 2380 } else { /* either full or lower-triangular (not packed) */ 2381 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2382 PetscInt i,j,N=mumps->id.size_schur; 2383 for (i=0;i<N;i++) { 2384 for (j=i;j<N;j++) { 2385 #if !defined(PETSC_USE_COMPLEX) 2386 PetscScalar val = mumps->id.schur[i*N+j]; 2387 #else 2388 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2389 #endif 2390 array[i*N+j] = val; 2391 array[j*N+i] = val; 2392 } 2393 } 2394 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2395 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2396 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2397 PetscInt i,j,N=mumps->id.size_schur; 2398 for (i=0;i<N;i++) { 2399 for (j=0;j<i+1;j++) { 2400 #if !defined(PETSC_USE_COMPLEX) 2401 PetscScalar val = mumps->id.schur[i*N+j]; 2402 #else 2403 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2404 #endif 2405 array[i*N+j] = val; 2406 array[j*N+i] = val; 2407 } 2408 } 2409 } 2410 } 2411 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2412 *S = St; 2413 PetscFunctionReturn(0); 2414 } 2415 2416 /* -------------------------------------------------------------------------------------------*/ 2417 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2418 { 2419 PetscErrorCode ierr; 2420 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2421 2422 PetscFunctionBegin; 2423 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2424 PetscFunctionReturn(0); 2425 } 2426 2427 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2428 { 2429 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2430 2431 PetscFunctionBegin; 2432 *ival = mumps->id.ICNTL(icntl); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 /*@ 2437 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2438 2439 Logically Collective on Mat 2440 2441 Input Parameters: 2442 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2443 . icntl - index of MUMPS parameter array ICNTL() 2444 - ival - value of MUMPS ICNTL(icntl) 2445 2446 Options Database: 2447 . -mat_mumps_icntl_<icntl> <ival> 2448 2449 Level: beginner 2450 2451 References: 2452 . MUMPS Users' Guide 2453 2454 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2455 @*/ 2456 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2457 { 2458 PetscErrorCode ierr; 2459 2460 PetscFunctionBegin; 2461 PetscValidType(F,1); 2462 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2463 PetscValidLogicalCollectiveInt(F,icntl,2); 2464 PetscValidLogicalCollectiveInt(F,ival,3); 2465 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 /*@ 2470 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2471 2472 Logically Collective on Mat 2473 2474 Input Parameters: 2475 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2476 - icntl - index of MUMPS parameter array ICNTL() 2477 2478 Output Parameter: 2479 . ival - value of MUMPS ICNTL(icntl) 2480 2481 Level: beginner 2482 2483 References: 2484 . MUMPS Users' Guide 2485 2486 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2487 @*/ 2488 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2489 { 2490 PetscErrorCode ierr; 2491 2492 PetscFunctionBegin; 2493 PetscValidType(F,1); 2494 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2495 PetscValidLogicalCollectiveInt(F,icntl,2); 2496 PetscValidIntPointer(ival,3); 2497 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 /* -------------------------------------------------------------------------------------------*/ 2502 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2503 { 2504 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2505 2506 PetscFunctionBegin; 2507 mumps->id.CNTL(icntl) = val; 2508 PetscFunctionReturn(0); 2509 } 2510 2511 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2512 { 2513 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2514 2515 PetscFunctionBegin; 2516 *val = mumps->id.CNTL(icntl); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 /*@ 2521 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2522 2523 Logically Collective on Mat 2524 2525 Input Parameters: 2526 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2527 . icntl - index of MUMPS parameter array CNTL() 2528 - val - value of MUMPS CNTL(icntl) 2529 2530 Options Database: 2531 . -mat_mumps_cntl_<icntl> <val> 2532 2533 Level: beginner 2534 2535 References: 2536 . MUMPS Users' Guide 2537 2538 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2539 @*/ 2540 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2541 { 2542 PetscErrorCode ierr; 2543 2544 PetscFunctionBegin; 2545 PetscValidType(F,1); 2546 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2547 PetscValidLogicalCollectiveInt(F,icntl,2); 2548 PetscValidLogicalCollectiveReal(F,val,3); 2549 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 /*@ 2554 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2555 2556 Logically Collective on Mat 2557 2558 Input Parameters: 2559 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2560 - icntl - index of MUMPS parameter array CNTL() 2561 2562 Output Parameter: 2563 . val - value of MUMPS CNTL(icntl) 2564 2565 Level: beginner 2566 2567 References: 2568 . MUMPS Users' Guide 2569 2570 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2571 @*/ 2572 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2573 { 2574 PetscErrorCode ierr; 2575 2576 PetscFunctionBegin; 2577 PetscValidType(F,1); 2578 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2579 PetscValidLogicalCollectiveInt(F,icntl,2); 2580 PetscValidRealPointer(val,3); 2581 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2586 { 2587 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2588 2589 PetscFunctionBegin; 2590 *info = mumps->id.INFO(icntl); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2595 { 2596 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2597 2598 PetscFunctionBegin; 2599 *infog = mumps->id.INFOG(icntl); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2604 { 2605 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2606 2607 PetscFunctionBegin; 2608 *rinfo = mumps->id.RINFO(icntl); 2609 PetscFunctionReturn(0); 2610 } 2611 2612 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2613 { 2614 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2615 2616 PetscFunctionBegin; 2617 *rinfog = mumps->id.RINFOG(icntl); 2618 PetscFunctionReturn(0); 2619 } 2620 2621 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2622 { 2623 PetscErrorCode ierr; 2624 Mat Bt = NULL,Btseq = NULL; 2625 PetscBool flg; 2626 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2627 PetscScalar *aa; 2628 PetscInt spnr,*ia,*ja,M,nrhs; 2629 2630 PetscFunctionBegin; 2631 PetscValidPointer(spRHS,2); 2632 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2633 if (flg) { 2634 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2635 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2636 2637 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2638 2639 if (mumps->petsc_size > 1) { 2640 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2641 Btseq = b->A; 2642 } else { 2643 Btseq = Bt; 2644 } 2645 2646 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2647 mumps->id.nrhs = nrhs; 2648 mumps->id.lrhs = M; 2649 mumps->id.rhs = NULL; 2650 2651 if (!mumps->myid) { 2652 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2653 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2654 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2655 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2656 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2657 } else { 2658 mumps->id.irhs_ptr = NULL; 2659 mumps->id.irhs_sparse = NULL; 2660 mumps->id.nz_rhs = 0; 2661 mumps->id.rhs_sparse = NULL; 2662 } 2663 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2664 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2665 2666 /* solve phase */ 2667 /*-------------*/ 2668 mumps->id.job = JOB_SOLVE; 2669 PetscMUMPS_c(mumps); 2670 if (mumps->id.INFOG(1) < 0) 2671 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2672 2673 if (!mumps->myid) { 2674 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2675 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2676 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2677 } 2678 PetscFunctionReturn(0); 2679 } 2680 2681 /*@ 2682 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2683 2684 Logically Collective on Mat 2685 2686 Input Parameters: 2687 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2688 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2689 2690 Output Parameter: 2691 . spRHS - requested entries of inverse of A 2692 2693 Level: beginner 2694 2695 References: 2696 . MUMPS Users' Guide 2697 2698 .seealso: MatGetFactor(), MatCreateTranspose() 2699 @*/ 2700 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2701 { 2702 PetscErrorCode ierr; 2703 2704 PetscFunctionBegin; 2705 PetscValidType(F,1); 2706 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2707 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2708 PetscFunctionReturn(0); 2709 } 2710 2711 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2712 { 2713 PetscErrorCode ierr; 2714 Mat spRHS; 2715 2716 PetscFunctionBegin; 2717 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2718 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2719 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 /*@ 2724 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2725 2726 Logically Collective on Mat 2727 2728 Input Parameters: 2729 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2730 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2731 2732 Output Parameter: 2733 . spRHST - requested entries of inverse of A^T 2734 2735 Level: beginner 2736 2737 References: 2738 . MUMPS Users' Guide 2739 2740 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2741 @*/ 2742 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2743 { 2744 PetscErrorCode ierr; 2745 PetscBool flg; 2746 2747 PetscFunctionBegin; 2748 PetscValidType(F,1); 2749 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2750 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2751 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2752 2753 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2754 PetscFunctionReturn(0); 2755 } 2756 2757 /*@ 2758 MatMumpsGetInfo - Get MUMPS parameter INFO() 2759 2760 Logically Collective on Mat 2761 2762 Input Parameters: 2763 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2764 - icntl - index of MUMPS parameter array INFO() 2765 2766 Output Parameter: 2767 . ival - value of MUMPS INFO(icntl) 2768 2769 Level: beginner 2770 2771 References: 2772 . MUMPS Users' Guide 2773 2774 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2775 @*/ 2776 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2777 { 2778 PetscErrorCode ierr; 2779 2780 PetscFunctionBegin; 2781 PetscValidType(F,1); 2782 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2783 PetscValidIntPointer(ival,3); 2784 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2785 PetscFunctionReturn(0); 2786 } 2787 2788 /*@ 2789 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2790 2791 Logically Collective on Mat 2792 2793 Input Parameters: 2794 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2795 - icntl - index of MUMPS parameter array INFOG() 2796 2797 Output Parameter: 2798 . ival - value of MUMPS INFOG(icntl) 2799 2800 Level: beginner 2801 2802 References: 2803 . MUMPS Users' Guide 2804 2805 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2806 @*/ 2807 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2808 { 2809 PetscErrorCode ierr; 2810 2811 PetscFunctionBegin; 2812 PetscValidType(F,1); 2813 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2814 PetscValidIntPointer(ival,3); 2815 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2816 PetscFunctionReturn(0); 2817 } 2818 2819 /*@ 2820 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2821 2822 Logically Collective on Mat 2823 2824 Input Parameters: 2825 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2826 - icntl - index of MUMPS parameter array RINFO() 2827 2828 Output Parameter: 2829 . val - value of MUMPS RINFO(icntl) 2830 2831 Level: beginner 2832 2833 References: 2834 . MUMPS Users' Guide 2835 2836 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2837 @*/ 2838 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2839 { 2840 PetscErrorCode ierr; 2841 2842 PetscFunctionBegin; 2843 PetscValidType(F,1); 2844 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2845 PetscValidRealPointer(val,3); 2846 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2847 PetscFunctionReturn(0); 2848 } 2849 2850 /*@ 2851 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2852 2853 Logically Collective on Mat 2854 2855 Input Parameters: 2856 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2857 - icntl - index of MUMPS parameter array RINFOG() 2858 2859 Output Parameter: 2860 . val - value of MUMPS RINFOG(icntl) 2861 2862 Level: beginner 2863 2864 References: 2865 . MUMPS Users' Guide 2866 2867 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2868 @*/ 2869 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2870 { 2871 PetscErrorCode ierr; 2872 2873 PetscFunctionBegin; 2874 PetscValidType(F,1); 2875 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2876 PetscValidRealPointer(val,3); 2877 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2878 PetscFunctionReturn(0); 2879 } 2880 2881 /*MC 2882 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2883 distributed and sequential matrices via the external package MUMPS. 2884 2885 Works with MATAIJ and MATSBAIJ matrices 2886 2887 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2888 2889 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2890 2891 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2892 2893 Options Database Keys: 2894 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2895 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2896 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2897 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2898 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2899 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2900 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2901 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2902 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2903 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2904 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2905 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2906 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2907 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2908 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2909 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2910 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2911 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2912 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2913 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2914 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2915 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2916 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2917 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2918 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2919 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2920 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2921 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2922 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2923 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2924 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2925 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2926 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2927 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2928 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2929 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2930 2931 Level: beginner 2932 2933 Notes: 2934 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2935 2936 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2937 $ KSPGetPC(ksp,&pc); 2938 $ PCFactorGetMatrix(pc,&mat); 2939 $ MatMumpsGetInfo(mat,....); 2940 $ MatMumpsGetInfog(mat,....); etc. 2941 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2942 2943 Using MUMPS with 64-bit integers 2944 MUMPS provides 64-bit integer support in two build modes: 2945 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2946 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2947 2948 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2949 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2950 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2951 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2952 2953 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2954 2955 Two modes to run MUMPS/PETSc with OpenMP 2956 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2957 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2958 2959 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2960 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2961 2962 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2963 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2964 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2965 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2966 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2967 2968 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2969 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2970 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2971 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2972 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2973 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2974 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2975 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2976 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2977 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2978 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2979 examine the mapping result. 2980 2981 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2982 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2983 calls omp_set_num_threads(m) internally before calling MUMPS. 2984 2985 References: 2986 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2987 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2988 2989 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2990 2991 M*/ 2992 2993 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2994 { 2995 PetscFunctionBegin; 2996 *type = MATSOLVERMUMPS; 2997 PetscFunctionReturn(0); 2998 } 2999 3000 /* MatGetFactor for Seq and MPI AIJ matrices */ 3001 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 3002 { 3003 Mat B; 3004 PetscErrorCode ierr; 3005 Mat_MUMPS *mumps; 3006 PetscBool isSeqAIJ; 3007 PetscMPIInt size; 3008 3009 PetscFunctionBegin; 3010 #if defined(PETSC_USE_COMPLEX) 3011 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3012 #endif 3013 /* Create the factorization matrix */ 3014 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 3015 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3016 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3017 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3018 ierr = MatSetUp(B);CHKERRQ(ierr); 3019 3020 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3021 3022 B->ops->view = MatView_MUMPS; 3023 B->ops->getinfo = MatGetInfo_MUMPS; 3024 3025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3038 3039 if (ftype == MAT_FACTOR_LU) { 3040 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3041 B->factortype = MAT_FACTOR_LU; 3042 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3043 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3044 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3045 mumps->sym = 0; 3046 } else { 3047 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3048 B->factortype = MAT_FACTOR_CHOLESKY; 3049 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3050 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3051 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3052 #if defined(PETSC_USE_COMPLEX) 3053 mumps->sym = 2; 3054 #else 3055 if (A->spd_set && A->spd) mumps->sym = 1; 3056 else mumps->sym = 2; 3057 #endif 3058 } 3059 3060 /* set solvertype */ 3061 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3062 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3063 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3064 if (size == 1) { 3065 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3066 B->canuseordering = PETSC_TRUE; 3067 } 3068 B->ops->destroy = MatDestroy_MUMPS; 3069 B->data = (void*)mumps; 3070 3071 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3072 3073 *F = B; 3074 PetscFunctionReturn(0); 3075 } 3076 3077 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3078 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3079 { 3080 Mat B; 3081 PetscErrorCode ierr; 3082 Mat_MUMPS *mumps; 3083 PetscBool isSeqSBAIJ; 3084 PetscMPIInt size; 3085 3086 PetscFunctionBegin; 3087 #if defined(PETSC_USE_COMPLEX) 3088 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3089 #endif 3090 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3091 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3092 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3093 ierr = MatSetUp(B);CHKERRQ(ierr); 3094 3095 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3096 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 3097 if (isSeqSBAIJ) { 3098 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3099 } else { 3100 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3101 } 3102 3103 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3104 B->ops->view = MatView_MUMPS; 3105 B->ops->getinfo = MatGetInfo_MUMPS; 3106 3107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3120 3121 B->factortype = MAT_FACTOR_CHOLESKY; 3122 #if defined(PETSC_USE_COMPLEX) 3123 mumps->sym = 2; 3124 #else 3125 if (A->spd_set && A->spd) mumps->sym = 1; 3126 else mumps->sym = 2; 3127 #endif 3128 3129 /* set solvertype */ 3130 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3131 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3132 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3133 if (size == 1) { 3134 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3135 B->canuseordering = PETSC_TRUE; 3136 } 3137 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3138 B->ops->destroy = MatDestroy_MUMPS; 3139 B->data = (void*)mumps; 3140 3141 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3142 3143 *F = B; 3144 PetscFunctionReturn(0); 3145 } 3146 3147 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3148 { 3149 Mat B; 3150 PetscErrorCode ierr; 3151 Mat_MUMPS *mumps; 3152 PetscBool isSeqBAIJ; 3153 PetscMPIInt size; 3154 3155 PetscFunctionBegin; 3156 /* Create the factorization matrix */ 3157 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 3158 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3159 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3160 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3161 ierr = MatSetUp(B);CHKERRQ(ierr); 3162 3163 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3164 if (ftype == MAT_FACTOR_LU) { 3165 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3166 B->factortype = MAT_FACTOR_LU; 3167 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3168 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3169 mumps->sym = 0; 3170 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3171 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 3172 3173 B->ops->view = MatView_MUMPS; 3174 B->ops->getinfo = MatGetInfo_MUMPS; 3175 3176 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3178 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3187 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3188 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3189 3190 /* set solvertype */ 3191 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3192 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3193 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3194 if (size == 1) { 3195 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3196 B->canuseordering = PETSC_TRUE; 3197 } 3198 B->ops->destroy = MatDestroy_MUMPS; 3199 B->data = (void*)mumps; 3200 3201 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3202 3203 *F = B; 3204 PetscFunctionReturn(0); 3205 } 3206 3207 /* MatGetFactor for Seq and MPI SELL matrices */ 3208 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3209 { 3210 Mat B; 3211 PetscErrorCode ierr; 3212 Mat_MUMPS *mumps; 3213 PetscBool isSeqSELL; 3214 PetscMPIInt size; 3215 3216 PetscFunctionBegin; 3217 /* Create the factorization matrix */ 3218 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3219 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3220 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3221 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3222 ierr = MatSetUp(B);CHKERRQ(ierr); 3223 3224 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3225 3226 B->ops->view = MatView_MUMPS; 3227 B->ops->getinfo = MatGetInfo_MUMPS; 3228 3229 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3230 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3231 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3232 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3233 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3234 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3235 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3236 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3237 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3238 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3239 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3240 3241 if (ftype == MAT_FACTOR_LU) { 3242 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3243 B->factortype = MAT_FACTOR_LU; 3244 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3245 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3246 mumps->sym = 0; 3247 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3248 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3249 3250 /* set solvertype */ 3251 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3252 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3253 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3254 if (size == 1) { 3255 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3256 B->canuseordering = PETSC_TRUE; 3257 } 3258 B->ops->destroy = MatDestroy_MUMPS; 3259 B->data = (void*)mumps; 3260 3261 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3262 3263 *F = B; 3264 PetscFunctionReturn(0); 3265 } 3266 3267 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3268 { 3269 PetscErrorCode ierr; 3270 3271 PetscFunctionBegin; 3272 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3273 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3274 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3275 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3276 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3277 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3278 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3279 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3280 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3281 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3282 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3283 PetscFunctionReturn(0); 3284 } 3285 3286