1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if defined(INTSIZE64) /* INTSIZE64 is a macro one used to build MUMPS in full 64-bit mode */ 53 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 54 #else 55 #define MPIU_MUMPSINT MPI_INT 56 #define PETSC_MUMPS_INT_MAX 2147483647 57 #define PETSC_MUMPS_INT_MIN -2147483648 58 #endif 59 60 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 61 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 62 { 63 PetscFunctionBegin; 64 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 65 *b = (PetscMUMPSInt)(a); 66 PetscFunctionReturn(0); 67 } 68 69 /* Put these utility routines here since they are only used in this file */ 70 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 71 { 72 PetscErrorCode ierr; 73 PetscInt myval; 74 PetscBool myset; 75 PetscFunctionBegin; 76 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 77 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 78 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 79 if (set) *set = myset; 80 PetscFunctionReturn(0); 81 } 82 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 83 84 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 85 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 86 #define PetscMUMPS_c(mumps) \ 87 do { \ 88 if (mumps->use_petsc_omp_support) { \ 89 if (mumps->is_omp_master) { \ 90 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 91 MUMPS_c(&mumps->id); \ 92 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 93 } \ 94 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 95 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 96 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 97 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 98 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 99 */ \ 100 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 101 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRQ(ierr); \ 102 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 103 } else { \ 104 MUMPS_c(&mumps->id); \ 105 } \ 106 } while(0) 107 #else 108 #define PetscMUMPS_c(mumps) \ 109 do { MUMPS_c(&mumps->id); } while (0) 110 #endif 111 112 /* declare MumpsScalar */ 113 #if defined(PETSC_USE_COMPLEX) 114 #if defined(PETSC_USE_REAL_SINGLE) 115 #define MumpsScalar mumps_complex 116 #else 117 #define MumpsScalar mumps_double_complex 118 #endif 119 #else 120 #define MumpsScalar PetscScalar 121 #endif 122 123 /* macros s.t. indices match MUMPS documentation */ 124 #define ICNTL(I) icntl[(I)-1] 125 #define CNTL(I) cntl[(I)-1] 126 #define INFOG(I) infog[(I)-1] 127 #define INFO(I) info[(I)-1] 128 #define RINFOG(I) rinfog[(I)-1] 129 #define RINFO(I) rinfo[(I)-1] 130 131 typedef struct Mat_MUMPS Mat_MUMPS; 132 struct Mat_MUMPS { 133 #if defined(PETSC_USE_COMPLEX) 134 #if defined(PETSC_USE_REAL_SINGLE) 135 CMUMPS_STRUC_C id; 136 #else 137 ZMUMPS_STRUC_C id; 138 #endif 139 #else 140 #if defined(PETSC_USE_REAL_SINGLE) 141 SMUMPS_STRUC_C id; 142 #else 143 DMUMPS_STRUC_C id; 144 #endif 145 #endif 146 147 MatStructure matstruc; 148 PetscMPIInt myid,petsc_size; 149 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 150 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 151 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 152 PetscMUMPSInt sym; 153 MPI_Comm mumps_comm; 154 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 155 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 156 Vec b_seq,x_seq; 157 PetscInt ninfo,*info; /* which INFO to display */ 158 PetscInt sizeredrhs; 159 PetscScalar *schur_sol; 160 PetscInt schur_sizesol; 161 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 162 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 163 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 164 165 /* stuff used by petsc/mumps OpenMP support*/ 166 PetscBool use_petsc_omp_support; 167 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 168 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 169 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 170 PetscMPIInt tag,omp_comm_size; 171 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 172 MPI_Request *reqs; 173 }; 174 175 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 176 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 177 */ 178 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 179 { 180 PetscErrorCode ierr; 181 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 182 183 PetscFunctionBegin; 184 #if defined(PETSC_USE_64BIT_INDICES) 185 { 186 PetscInt i; 187 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 188 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 189 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 190 mumps->cur_ilen = nrow+1; 191 } 192 if (nnz > mumps->cur_jlen) { 193 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 194 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 195 mumps->cur_jlen = nnz; 196 } 197 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 198 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 199 *ia_mumps = mumps->ia_alloc; 200 *ja_mumps = mumps->ja_alloc; 201 } 202 #else 203 *ia_mumps = ia; 204 *ja_mumps = ja; 205 #endif 206 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 207 PetscFunctionReturn(0); 208 } 209 210 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 211 { 212 PetscErrorCode ierr; 213 214 PetscFunctionBegin; 215 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 218 mumps->id.size_schur = 0; 219 mumps->id.schur_lld = 0; 220 mumps->id.ICNTL(19) = 0; 221 PetscFunctionReturn(0); 222 } 223 224 /* solve with rhs in mumps->id.redrhs and return in the same location */ 225 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 226 { 227 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 228 Mat S,B,X; 229 MatFactorSchurStatus schurstatus; 230 PetscInt sizesol; 231 PetscErrorCode ierr; 232 233 PetscFunctionBegin; 234 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 235 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 236 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 237 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 239 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 240 #endif 241 switch (schurstatus) { 242 case MAT_FACTOR_SCHUR_FACTORED: 243 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 244 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 245 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 246 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 247 #endif 248 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 249 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 250 } else { 251 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 252 } 253 break; 254 case MAT_FACTOR_SCHUR_INVERTED: 255 sizesol = mumps->id.nrhs*mumps->id.size_schur; 256 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 257 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 258 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 259 mumps->schur_sizesol = sizesol; 260 } 261 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 262 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 263 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 264 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 265 #endif 266 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 267 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 268 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 269 } else { 270 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 271 } 272 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 273 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 274 ierr = MatProductNumeric(X);CHKERRQ(ierr); 275 276 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 277 break; 278 default: 279 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 280 break; 281 } 282 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 283 ierr = MatDestroy(&B);CHKERRQ(ierr); 284 ierr = MatDestroy(&X);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 289 { 290 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 291 PetscErrorCode ierr; 292 293 PetscFunctionBegin; 294 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 295 PetscFunctionReturn(0); 296 } 297 if (!expansion) { /* prepare for the condensation step */ 298 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 299 /* allocate MUMPS internal array to store reduced right-hand sides */ 300 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 301 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 302 mumps->id.lredrhs = mumps->id.size_schur; 303 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 304 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 305 } 306 mumps->id.ICNTL(26) = 1; /* condensation phase */ 307 } else { /* prepare for the expansion step */ 308 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 309 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 310 mumps->id.ICNTL(26) = 2; /* expansion phase */ 311 PetscMUMPS_c(mumps); 312 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 313 /* restore defaults */ 314 mumps->id.ICNTL(26) = -1; 315 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 316 if (mumps->id.nrhs > 1) { 317 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 318 mumps->id.lredrhs = 0; 319 mumps->sizeredrhs = 0; 320 } 321 } 322 PetscFunctionReturn(0); 323 } 324 325 /* 326 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 327 328 input: 329 A - matrix in aij,baij or sbaij format 330 shift - 0: C style output triple; 1: Fortran style output triple. 331 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 332 MAT_REUSE_MATRIX: only the values in v array are updated 333 output: 334 nnz - dim of r, c, and v (number of local nonzero entries of A) 335 r, c, v - row and col index, matrix values (matrix triples) 336 337 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 338 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 339 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 340 341 */ 342 343 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 344 { 345 const PetscScalar *av; 346 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 347 PetscInt64 nz,rnz,i,j,k; 348 PetscErrorCode ierr; 349 PetscMUMPSInt *row,*col; 350 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 351 352 PetscFunctionBegin; 353 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 354 mumps->val = (PetscScalar*)av; 355 if (reuse == MAT_INITIAL_MATRIX) { 356 nz = aa->nz; 357 ai = aa->i; 358 aj = aa->j; 359 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 360 for (i=k=0; i<M; i++) { 361 rnz = ai[i+1] - ai[i]; 362 ajj = aj + ai[i]; 363 for (j=0; j<rnz; j++) { 364 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 365 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 366 k++; 367 } 368 } 369 mumps->irn = row; 370 mumps->jcn = col; 371 mumps->nnz = nz; 372 } 373 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 378 { 379 PetscErrorCode ierr; 380 PetscInt64 nz,i,j,k,r; 381 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 382 PetscMUMPSInt *row,*col; 383 384 PetscFunctionBegin; 385 mumps->val = a->val; 386 if (reuse == MAT_INITIAL_MATRIX) { 387 nz = a->sliidx[a->totalslices]; 388 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 389 for (i=k=0; i<a->totalslices; i++) { 390 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 391 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 392 } 393 } 394 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 395 mumps->irn = row; 396 mumps->jcn = col; 397 mumps->nnz = nz; 398 } 399 PetscFunctionReturn(0); 400 } 401 402 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 403 { 404 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 405 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 406 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 407 PetscInt bs; 408 PetscErrorCode ierr; 409 PetscMUMPSInt *row,*col; 410 411 PetscFunctionBegin; 412 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 413 M = A->rmap->N/bs; 414 mumps->val = aa->a; 415 if (reuse == MAT_INITIAL_MATRIX) { 416 ai = aa->i; aj = aa->j; 417 nz = bs2*aa->nz; 418 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 419 for (i=0; i<M; i++) { 420 ajj = aj + ai[i]; 421 rnz = ai[i+1] - ai[i]; 422 for (k=0; k<rnz; k++) { 423 for (j=0; j<bs; j++) { 424 for (m=0; m<bs; m++) { 425 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 426 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 427 idx++; 428 } 429 } 430 } 431 } 432 mumps->irn = row; 433 mumps->jcn = col; 434 mumps->nnz = nz; 435 } 436 PetscFunctionReturn(0); 437 } 438 439 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 440 { 441 const PetscInt *ai, *aj,*ajj; 442 PetscInt bs; 443 PetscInt64 nz,rnz,i,j,k,m; 444 PetscErrorCode ierr; 445 PetscMUMPSInt *row,*col; 446 PetscScalar *val; 447 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 448 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 449 #if defined(PETSC_USE_COMPLEX) 450 PetscBool hermitian; 451 #endif 452 453 PetscFunctionBegin; 454 #if defined(PETSC_USE_COMPLEX) 455 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 456 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 457 #endif 458 ai = aa->i; 459 aj = aa->j; 460 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 461 if (reuse == MAT_INITIAL_MATRIX) { 462 nz = aa->nz; 463 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 464 if (bs>1) { 465 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 466 mumps->val = mumps->val_alloc; 467 } else { 468 mumps->val = aa->a; 469 } 470 mumps->irn = row; 471 mumps->jcn = col; 472 } else { 473 if (bs == 1) mumps->val = aa->a; 474 row = mumps->irn; 475 col = mumps->jcn; 476 } 477 val = mumps->val; 478 479 nz = 0; 480 if (bs>1) { 481 for (i=0; i<mbs; i++) { 482 rnz = ai[i+1] - ai[i]; 483 ajj = aj + ai[i]; 484 for (j=0; j<rnz; j++) { 485 for (k=0; k<bs; k++) { 486 for (m=0; m<bs; m++) { 487 if (ajj[j]>i || k>=m) { 488 if (reuse == MAT_INITIAL_MATRIX) { 489 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 490 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 491 } 492 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 493 } 494 } 495 } 496 } 497 } 498 } else if (reuse == MAT_INITIAL_MATRIX) { 499 for (i=0; i<mbs; i++) { 500 rnz = ai[i+1] - ai[i]; 501 ajj = aj + ai[i]; 502 for (j=0; j<rnz; j++) { 503 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 504 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 505 nz++; 506 } 507 } 508 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 509 } 510 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 511 PetscFunctionReturn(0); 512 } 513 514 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 515 { 516 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 517 PetscInt64 nz,rnz,i,j; 518 const PetscScalar *av,*v1; 519 PetscScalar *val; 520 PetscErrorCode ierr; 521 PetscMUMPSInt *row,*col; 522 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 523 PetscBool missing; 524 #if defined(PETSC_USE_COMPLEX) 525 PetscBool hermitian; 526 #endif 527 528 PetscFunctionBegin; 529 #if defined(PETSC_USE_COMPLEX) 530 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 531 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 532 #endif 533 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 534 ai = aa->i; aj = aa->j; 535 adiag = aa->diag; 536 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 537 if (reuse == MAT_INITIAL_MATRIX) { 538 /* count nz in the upper triangular part of A */ 539 nz = 0; 540 if (missing) { 541 for (i=0; i<M; i++) { 542 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 543 for (j=ai[i];j<ai[i+1];j++) { 544 if (aj[j] < i) continue; 545 nz++; 546 } 547 } else { 548 nz += ai[i+1] - adiag[i]; 549 } 550 } 551 } else { 552 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 553 } 554 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 555 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 556 mumps->nnz = nz; 557 mumps->irn = row; 558 mumps->jcn = col; 559 mumps->val = mumps->val_alloc = val; 560 561 nz = 0; 562 if (missing) { 563 for (i=0; i<M; i++) { 564 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 565 for (j=ai[i];j<ai[i+1];j++) { 566 if (aj[j] < i) continue; 567 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 568 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 569 val[nz] = av[j]; 570 nz++; 571 } 572 } else { 573 rnz = ai[i+1] - adiag[i]; 574 ajj = aj + adiag[i]; 575 v1 = av + adiag[i]; 576 for (j=0; j<rnz; j++) { 577 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 578 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 579 val[nz++] = v1[j]; 580 } 581 } 582 } 583 } else { 584 for (i=0; i<M; i++) { 585 rnz = ai[i+1] - adiag[i]; 586 ajj = aj + adiag[i]; 587 v1 = av + adiag[i]; 588 for (j=0; j<rnz; j++) { 589 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 590 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 591 val[nz++] = v1[j]; 592 } 593 } 594 } 595 } else { 596 nz = 0; 597 val = mumps->val; 598 if (missing) { 599 for (i=0; i <M; i++) { 600 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 601 for (j=ai[i];j<ai[i+1];j++) { 602 if (aj[j] < i) continue; 603 val[nz++] = av[j]; 604 } 605 } else { 606 rnz = ai[i+1] - adiag[i]; 607 v1 = av + adiag[i]; 608 for (j=0; j<rnz; j++) { 609 val[nz++] = v1[j]; 610 } 611 } 612 } 613 } else { 614 for (i=0; i <M; i++) { 615 rnz = ai[i+1] - adiag[i]; 616 v1 = av + adiag[i]; 617 for (j=0; j<rnz; j++) { 618 val[nz++] = v1[j]; 619 } 620 } 621 } 622 } 623 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 628 { 629 PetscErrorCode ierr; 630 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 631 PetscInt bs; 632 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 633 PetscMUMPSInt *row,*col; 634 const PetscScalar *av,*bv,*v1,*v2; 635 PetscScalar *val; 636 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 637 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 638 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 639 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 640 #if defined(PETSC_USE_COMPLEX) 641 PetscBool hermitian; 642 #endif 643 644 PetscFunctionBegin; 645 #if defined(PETSC_USE_COMPLEX) 646 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 647 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 648 #endif 649 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 650 rstart = A->rmap->rstart; 651 ai = aa->i; 652 aj = aa->j; 653 bi = bb->i; 654 bj = bb->j; 655 av = aa->a; 656 bv = bb->a; 657 658 garray = mat->garray; 659 660 if (reuse == MAT_INITIAL_MATRIX) { 661 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 662 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 663 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 664 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 665 mumps->irn = row; 666 mumps->jcn = col; 667 mumps->val = mumps->val_alloc = val; 668 } else { 669 val = mumps->val; 670 } 671 672 jj = 0; irow = rstart; 673 for (i=0; i<mbs; i++) { 674 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 675 countA = ai[i+1] - ai[i]; 676 countB = bi[i+1] - bi[i]; 677 bjj = bj + bi[i]; 678 v1 = av + ai[i]*bs2; 679 v2 = bv + bi[i]*bs2; 680 681 if (bs>1) { 682 /* A-part */ 683 for (j=0; j<countA; j++) { 684 for (k=0; k<bs; k++) { 685 for (m=0; m<bs; m++) { 686 if (rstart + ajj[j]*bs>irow || k>=m) { 687 if (reuse == MAT_INITIAL_MATRIX) { 688 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 689 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 690 } 691 val[jj++] = v1[j*bs2 + m + k*bs]; 692 } 693 } 694 } 695 } 696 697 /* B-part */ 698 for (j=0; j < countB; j++) { 699 for (k=0; k<bs; k++) { 700 for (m=0; m<bs; m++) { 701 if (reuse == MAT_INITIAL_MATRIX) { 702 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 703 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 704 } 705 val[jj++] = v2[j*bs2 + m + k*bs]; 706 } 707 } 708 } 709 } else { 710 /* A-part */ 711 for (j=0; j<countA; j++) { 712 if (reuse == MAT_INITIAL_MATRIX) { 713 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 714 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 715 } 716 val[jj++] = v1[j]; 717 } 718 719 /* B-part */ 720 for (j=0; j < countB; j++) { 721 if (reuse == MAT_INITIAL_MATRIX) { 722 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 723 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 724 } 725 val[jj++] = v2[j]; 726 } 727 } 728 irow+=bs; 729 } 730 mumps->nnz = jj; 731 PetscFunctionReturn(0); 732 } 733 734 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 735 { 736 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 737 PetscErrorCode ierr; 738 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 739 PetscMUMPSInt *row,*col; 740 const PetscScalar *av, *bv,*v1,*v2; 741 PetscScalar *val; 742 Mat Ad,Ao; 743 Mat_SeqAIJ *aa; 744 Mat_SeqAIJ *bb; 745 746 PetscFunctionBegin; 747 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 748 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 749 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 750 751 aa = (Mat_SeqAIJ*)(Ad)->data; 752 bb = (Mat_SeqAIJ*)(Ao)->data; 753 ai = aa->i; 754 aj = aa->j; 755 bi = bb->i; 756 bj = bb->j; 757 758 rstart = A->rmap->rstart; 759 760 if (reuse == MAT_INITIAL_MATRIX) { 761 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 762 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 763 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 764 mumps->nnz = nz; 765 mumps->irn = row; 766 mumps->jcn = col; 767 mumps->val = mumps->val_alloc = val; 768 } else { 769 val = mumps->val; 770 } 771 772 jj = 0; irow = rstart; 773 for (i=0; i<m; i++) { 774 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 775 countA = ai[i+1] - ai[i]; 776 countB = bi[i+1] - bi[i]; 777 bjj = bj + bi[i]; 778 v1 = av + ai[i]; 779 v2 = bv + bi[i]; 780 781 /* A-part */ 782 for (j=0; j<countA; j++) { 783 if (reuse == MAT_INITIAL_MATRIX) { 784 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 785 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 786 } 787 val[jj++] = v1[j]; 788 } 789 790 /* B-part */ 791 for (j=0; j < countB; j++) { 792 if (reuse == MAT_INITIAL_MATRIX) { 793 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 794 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 795 } 796 val[jj++] = v2[j]; 797 } 798 irow++; 799 } 800 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 801 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } 804 805 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 806 { 807 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 808 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 809 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 810 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 811 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 812 const PetscInt bs2=mat->bs2; 813 PetscErrorCode ierr; 814 PetscInt bs; 815 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 816 PetscMUMPSInt *row,*col; 817 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 818 PetscScalar *val; 819 820 PetscFunctionBegin; 821 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 822 if (reuse == MAT_INITIAL_MATRIX) { 823 nz = bs2*(aa->nz + bb->nz); 824 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 825 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 826 mumps->nnz = nz; 827 mumps->irn = row; 828 mumps->jcn = col; 829 mumps->val = mumps->val_alloc = val; 830 } else { 831 val = mumps->val; 832 } 833 834 jj = 0; irow = rstart; 835 for (i=0; i<mbs; i++) { 836 countA = ai[i+1] - ai[i]; 837 countB = bi[i+1] - bi[i]; 838 ajj = aj + ai[i]; 839 bjj = bj + bi[i]; 840 v1 = av + bs2*ai[i]; 841 v2 = bv + bs2*bi[i]; 842 843 idx = 0; 844 /* A-part */ 845 for (k=0; k<countA; k++) { 846 for (j=0; j<bs; j++) { 847 for (n=0; n<bs; n++) { 848 if (reuse == MAT_INITIAL_MATRIX) { 849 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 850 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 851 } 852 val[jj++] = v1[idx++]; 853 } 854 } 855 } 856 857 idx = 0; 858 /* B-part */ 859 for (k=0; k<countB; k++) { 860 for (j=0; j<bs; j++) { 861 for (n=0; n<bs; n++) { 862 if (reuse == MAT_INITIAL_MATRIX) { 863 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 864 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 865 } 866 val[jj++] = v2[idx++]; 867 } 868 } 869 } 870 irow += bs; 871 } 872 PetscFunctionReturn(0); 873 } 874 875 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 876 { 877 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 878 PetscErrorCode ierr; 879 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 880 PetscMUMPSInt *row,*col; 881 const PetscScalar *av, *bv,*v1,*v2; 882 PetscScalar *val; 883 Mat Ad,Ao; 884 Mat_SeqAIJ *aa; 885 Mat_SeqAIJ *bb; 886 #if defined(PETSC_USE_COMPLEX) 887 PetscBool hermitian; 888 #endif 889 890 PetscFunctionBegin; 891 #if defined(PETSC_USE_COMPLEX) 892 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 893 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 894 #endif 895 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 896 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 897 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 898 899 aa = (Mat_SeqAIJ*)(Ad)->data; 900 bb = (Mat_SeqAIJ*)(Ao)->data; 901 ai = aa->i; 902 aj = aa->j; 903 adiag = aa->diag; 904 bi = bb->i; 905 bj = bb->j; 906 907 rstart = A->rmap->rstart; 908 909 if (reuse == MAT_INITIAL_MATRIX) { 910 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 911 nzb = 0; /* num of upper triangular entries in mat->B */ 912 for (i=0; i<m; i++) { 913 nza += (ai[i+1] - adiag[i]); 914 countB = bi[i+1] - bi[i]; 915 bjj = bj + bi[i]; 916 for (j=0; j<countB; j++) { 917 if (garray[bjj[j]] > rstart) nzb++; 918 } 919 } 920 921 nz = nza + nzb; /* total nz of upper triangular part of mat */ 922 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 923 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 924 mumps->nnz = nz; 925 mumps->irn = row; 926 mumps->jcn = col; 927 mumps->val = mumps->val_alloc = val; 928 } else { 929 val = mumps->val; 930 } 931 932 jj = 0; irow = rstart; 933 for (i=0; i<m; i++) { 934 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 935 v1 = av + adiag[i]; 936 countA = ai[i+1] - adiag[i]; 937 countB = bi[i+1] - bi[i]; 938 bjj = bj + bi[i]; 939 v2 = bv + bi[i]; 940 941 /* A-part */ 942 for (j=0; j<countA; j++) { 943 if (reuse == MAT_INITIAL_MATRIX) { 944 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 945 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 946 } 947 val[jj++] = v1[j]; 948 } 949 950 /* B-part */ 951 for (j=0; j < countB; j++) { 952 if (garray[bjj[j]] > rstart) { 953 if (reuse == MAT_INITIAL_MATRIX) { 954 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 955 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 956 } 957 val[jj++] = v2[j]; 958 } 959 } 960 irow++; 961 } 962 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 963 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatDestroy_MUMPS(Mat A) 968 { 969 PetscErrorCode ierr; 970 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 971 972 PetscFunctionBegin; 973 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 974 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 975 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 976 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 977 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 978 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 979 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 980 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 981 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 982 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 983 mumps->id.job = JOB_END; 984 PetscMUMPS_c(mumps); 985 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 986 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 987 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 988 #endif 989 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 990 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 991 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 993 ierr = PetscFree(A->data);CHKERRQ(ierr); 994 995 /* clear composed functions */ 996 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 997 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 998 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 999 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1000 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1001 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1002 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1003 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1004 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1005 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1006 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1007 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1008 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1013 { 1014 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1015 PetscScalar *array; 1016 Vec b_seq; 1017 IS is_iden,is_petsc; 1018 PetscErrorCode ierr; 1019 PetscInt i; 1020 PetscBool second_solve = PETSC_FALSE; 1021 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1022 1023 PetscFunctionBegin; 1024 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1025 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1026 1027 if (A->factorerrortype) { 1028 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1029 ierr = VecSetInf(x);CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1034 mumps->id.nrhs = 1; 1035 b_seq = mumps->b_seq; 1036 if (mumps->petsc_size > 1) { 1037 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 1038 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1039 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1040 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 1041 } else { /* petsc_size == 1 */ 1042 ierr = VecCopy(b,x);CHKERRQ(ierr); 1043 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1044 } 1045 if (!mumps->myid) { /* define rhs on the host */ 1046 mumps->id.nrhs = 1; 1047 mumps->id.rhs = (MumpsScalar*)array; 1048 } 1049 1050 /* 1051 handle condensation step of Schur complement (if any) 1052 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1053 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1054 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1055 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1056 */ 1057 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1058 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1059 second_solve = PETSC_TRUE; 1060 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1061 } 1062 /* solve phase */ 1063 /*-------------*/ 1064 mumps->id.job = JOB_SOLVE; 1065 PetscMUMPS_c(mumps); 1066 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1067 1068 /* handle expansion step of Schur complement (if any) */ 1069 if (second_solve) { 1070 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1071 } 1072 1073 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1074 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1075 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1076 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1077 } 1078 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1079 PetscInt *isol2_loc=NULL; 1080 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1081 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1082 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1083 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1084 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1085 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1086 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1087 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1088 } 1089 1090 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 } 1093 1094 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 1095 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1096 1097 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1102 { 1103 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 mumps->id.ICNTL(9) = 0; 1108 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1109 mumps->id.ICNTL(9) = 1; 1110 PetscFunctionReturn(0); 1111 } 1112 1113 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1114 { 1115 PetscErrorCode ierr; 1116 Mat Bt = NULL; 1117 PetscBool denseX,denseB,flg,flgT; 1118 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1119 PetscInt i,nrhs,M; 1120 PetscScalar *array; 1121 const PetscScalar *rbray; 1122 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1123 PetscMUMPSInt *isol_loc,*isol_loc_save; 1124 PetscScalar *bray,*sol_loc,*sol_loc_save; 1125 IS is_to,is_from; 1126 PetscInt k,proc,j,m,myrstart; 1127 const PetscInt *rstart; 1128 Vec v_mpi,b_seq,msol_loc; 1129 VecScatter scat_rhs,scat_sol; 1130 PetscScalar *aa; 1131 PetscInt spnr,*ia,*ja; 1132 Mat_MPIAIJ *b = NULL; 1133 1134 PetscFunctionBegin; 1135 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1136 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1137 1138 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1139 if (denseB) { 1140 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1141 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1142 } else { /* sparse B */ 1143 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1144 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1145 if (flgT) { /* input B is transpose of actural RHS matrix, 1146 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1147 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1148 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1149 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1150 } 1151 1152 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1153 mumps->id.nrhs = nrhs; 1154 mumps->id.lrhs = M; 1155 mumps->id.rhs = NULL; 1156 1157 if (mumps->petsc_size == 1) { 1158 PetscScalar *aa; 1159 PetscInt spnr,*ia,*ja; 1160 PetscBool second_solve = PETSC_FALSE; 1161 1162 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1163 mumps->id.rhs = (MumpsScalar*)array; 1164 1165 if (denseB) { 1166 /* copy B to X */ 1167 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1168 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1169 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1170 } else { /* sparse B */ 1171 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1172 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1173 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1174 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1175 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1176 } 1177 /* handle condensation step of Schur complement (if any) */ 1178 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1179 second_solve = PETSC_TRUE; 1180 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1181 } 1182 /* solve phase */ 1183 /*-------------*/ 1184 mumps->id.job = JOB_SOLVE; 1185 PetscMUMPS_c(mumps); 1186 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1187 1188 /* handle expansion step of Schur complement (if any) */ 1189 if (second_solve) { 1190 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1191 } 1192 if (!denseB) { /* sparse B */ 1193 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1194 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1195 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1196 } 1197 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1202 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1203 1204 /* create msol_loc to hold mumps local solution */ 1205 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1206 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1207 1208 lsol_loc = mumps->id.lsol_loc; 1209 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1210 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1211 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1212 mumps->id.isol_loc = isol_loc; 1213 1214 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1215 1216 if (denseB) { /* dense B */ 1217 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1218 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1219 0, re-arrange B into desired order, which is a local operation. 1220 */ 1221 1222 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1223 /* wrap dense rhs matrix B into a vector v_mpi */ 1224 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1225 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1226 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1227 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1228 1229 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1230 if (!mumps->myid) { 1231 PetscInt *idx; 1232 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1233 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1234 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1235 k = 0; 1236 for (proc=0; proc<mumps->petsc_size; proc++){ 1237 for (j=0; j<nrhs; j++){ 1238 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1239 } 1240 } 1241 1242 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1243 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1244 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1245 } else { 1246 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1247 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1248 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1249 } 1250 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1251 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1253 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1254 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1255 1256 if (!mumps->myid) { /* define rhs on the host */ 1257 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1258 mumps->id.rhs = (MumpsScalar*)bray; 1259 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1260 } 1261 1262 } else { /* sparse B */ 1263 b = (Mat_MPIAIJ*)Bt->data; 1264 1265 /* wrap dense X into a vector v_mpi */ 1266 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1267 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1268 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1269 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1270 1271 if (!mumps->myid) { 1272 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1273 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1274 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1275 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1276 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1277 } else { 1278 mumps->id.irhs_ptr = NULL; 1279 mumps->id.irhs_sparse = NULL; 1280 mumps->id.nz_rhs = 0; 1281 mumps->id.rhs_sparse = NULL; 1282 } 1283 } 1284 1285 /* solve phase */ 1286 /*-------------*/ 1287 mumps->id.job = JOB_SOLVE; 1288 PetscMUMPS_c(mumps); 1289 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1290 1291 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1292 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1293 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1294 1295 /* create scatter scat_sol */ 1296 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1297 /* iidx: index for scatter mumps solution to petsc X */ 1298 1299 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1300 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1301 for (i=0; i<lsol_loc; i++) { 1302 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1303 1304 for (proc=0; proc<mumps->petsc_size; proc++){ 1305 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1306 myrstart = rstart[proc]; 1307 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1308 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1309 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1310 break; 1311 } 1312 } 1313 1314 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1315 } 1316 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1317 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1318 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1320 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1321 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1322 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1323 1324 /* free spaces */ 1325 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1326 mumps->id.isol_loc = isol_loc_save; 1327 1328 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1329 ierr = PetscFree(idxx);CHKERRQ(ierr); 1330 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1331 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1332 if (!denseB) { 1333 if (!mumps->myid) { 1334 b = (Mat_MPIAIJ*)Bt->data; 1335 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1336 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1337 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1338 } 1339 } else { 1340 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1341 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1342 } 1343 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1344 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1349 { 1350 PetscErrorCode ierr; 1351 PetscBool flg; 1352 Mat B; 1353 1354 PetscFunctionBegin; 1355 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1356 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1357 1358 /* Create B=Bt^T that uses Bt's data structure */ 1359 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1360 1361 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1362 ierr = MatDestroy(&B);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #if !defined(PETSC_USE_COMPLEX) 1367 /* 1368 input: 1369 F: numeric factor 1370 output: 1371 nneg: total number of negative pivots 1372 nzero: total number of zero pivots 1373 npos: (global dimension of F) - nneg - nzero 1374 */ 1375 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1376 { 1377 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1378 PetscErrorCode ierr; 1379 PetscMPIInt size; 1380 1381 PetscFunctionBegin; 1382 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1383 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1384 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1385 1386 if (nneg) *nneg = mumps->id.INFOG(12); 1387 if (nzero || npos) { 1388 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1389 if (nzero) *nzero = mumps->id.INFOG(28); 1390 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 #endif 1395 1396 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1397 { 1398 PetscErrorCode ierr; 1399 PetscInt i,nreqs; 1400 PetscMUMPSInt *irn,*jcn; 1401 PetscMPIInt count; 1402 PetscInt64 totnnz,remain; 1403 const PetscInt osize=mumps->omp_comm_size; 1404 PetscScalar *val; 1405 1406 PetscFunctionBegin; 1407 if (osize > 1) { 1408 if (reuse == MAT_INITIAL_MATRIX) { 1409 /* master first gathers counts of nonzeros to receive */ 1410 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1411 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRQ(ierr); 1412 1413 /* Then each computes number of send/recvs */ 1414 if (mumps->is_omp_master) { 1415 /* Start from 1 since self communication is not done in MPI */ 1416 nreqs = 0; 1417 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1418 } else { 1419 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1420 } 1421 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1422 1423 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1424 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1425 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1426 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1427 */ 1428 nreqs = 0; /* counter for actual send/recvs */ 1429 if (mumps->is_omp_master) { 1430 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1431 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1432 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1433 1434 /* Self communication */ 1435 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1436 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1437 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1438 1439 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1440 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1441 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1442 mumps->nnz = totnnz; 1443 mumps->irn = irn; 1444 mumps->jcn = jcn; 1445 mumps->val = mumps->val_alloc = val; 1446 1447 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1448 jcn += mumps->recvcount[0]; 1449 val += mumps->recvcount[0]; 1450 1451 /* Remote communication */ 1452 for (i=1; i<osize; i++) { 1453 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1454 remain = mumps->recvcount[i] - count; 1455 while (count>0) { 1456 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1457 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1458 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1459 irn += count; 1460 jcn += count; 1461 val += count; 1462 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1463 remain -= count; 1464 } 1465 } 1466 } else { 1467 irn = mumps->irn; 1468 jcn = mumps->jcn; 1469 val = mumps->val; 1470 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1471 remain = mumps->nnz - count; 1472 while (count>0) { 1473 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1474 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1475 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1476 irn += count; 1477 jcn += count; 1478 val += count; 1479 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1480 remain -= count; 1481 } 1482 } 1483 } else { 1484 nreqs = 0; 1485 if (mumps->is_omp_master) { 1486 val = mumps->val + mumps->recvcount[0]; 1487 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1488 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1489 remain = mumps->recvcount[i] - count; 1490 while (count>0) { 1491 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1492 val += count; 1493 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1494 remain -= count; 1495 } 1496 } 1497 } else { 1498 val = mumps->val; 1499 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1500 remain = mumps->nnz - count; 1501 while (count>0) { 1502 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1503 val += count; 1504 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1505 remain -= count; 1506 } 1507 } 1508 } 1509 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1510 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1511 } 1512 PetscFunctionReturn(0); 1513 } 1514 1515 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1516 { 1517 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1518 PetscErrorCode ierr; 1519 PetscBool isMPIAIJ; 1520 1521 PetscFunctionBegin; 1522 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1523 if (mumps->id.INFOG(1) == -6) { 1524 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1525 } 1526 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1531 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1532 1533 /* numerical factorization phase */ 1534 /*-------------------------------*/ 1535 mumps->id.job = JOB_FACTNUMERIC; 1536 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1537 if (!mumps->myid) { 1538 mumps->id.a = (MumpsScalar*)mumps->val; 1539 } 1540 } else { 1541 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1542 } 1543 PetscMUMPS_c(mumps); 1544 if (mumps->id.INFOG(1) < 0) { 1545 if (A->erroriffailure) { 1546 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1547 } else { 1548 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1549 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1550 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1551 } else if (mumps->id.INFOG(1) == -13) { 1552 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1553 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1554 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1555 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1556 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1557 } else { 1558 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1559 F->factorerrortype = MAT_FACTOR_OTHER; 1560 } 1561 } 1562 } 1563 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1564 1565 F->assembled = PETSC_TRUE; 1566 mumps->matstruc = SAME_NONZERO_PATTERN; 1567 if (F->schur) { /* reset Schur status to unfactored */ 1568 #if defined(PETSC_HAVE_CUDA) 1569 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1570 #endif 1571 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1572 mumps->id.ICNTL(19) = 2; 1573 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1574 } 1575 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1576 } 1577 1578 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1579 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1580 1581 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1582 if (mumps->petsc_size > 1) { 1583 PetscInt lsol_loc; 1584 PetscScalar *sol_loc; 1585 1586 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1587 1588 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1589 if (mumps->x_seq) { 1590 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1591 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1592 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1593 } 1594 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1595 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1596 mumps->id.lsol_loc = lsol_loc; 1597 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1598 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1599 } 1600 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 /* Sets MUMPS options from the options database */ 1605 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1606 { 1607 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1608 PetscErrorCode ierr; 1609 PetscMUMPSInt icntl=0; 1610 PetscInt info[80],i,ninfo=80; 1611 PetscBool flg=PETSC_FALSE; 1612 1613 PetscFunctionBegin; 1614 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1615 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1616 if (flg) mumps->id.ICNTL(1) = icntl; 1617 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1618 if (flg) mumps->id.ICNTL(2) = icntl; 1619 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1620 if (flg) mumps->id.ICNTL(3) = icntl; 1621 1622 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1623 if (flg) mumps->id.ICNTL(4) = icntl; 1624 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1625 1626 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1627 if (flg) mumps->id.ICNTL(6) = icntl; 1628 1629 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1630 if (flg) { 1631 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1632 else mumps->id.ICNTL(7) = icntl; 1633 } 1634 1635 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1636 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1637 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1638 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1639 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1640 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1641 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1642 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1643 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1644 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1645 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1646 } 1647 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1648 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1649 1650 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1651 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1652 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1653 if (mumps->id.ICNTL(24)) { 1654 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1655 } 1656 1657 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1658 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1659 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1660 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1661 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1662 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1663 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1664 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1665 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1666 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1667 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1668 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1669 1670 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1671 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1672 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1673 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1674 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1675 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1676 1677 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1678 1679 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1680 if (ninfo) { 1681 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1682 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1683 mumps->ninfo = ninfo; 1684 for (i=0; i<ninfo; i++) { 1685 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1686 else mumps->info[i] = info[i]; 1687 } 1688 } 1689 1690 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1695 { 1696 PetscErrorCode ierr; 1697 PetscInt nthreads=0; 1698 1699 PetscFunctionBegin; 1700 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1701 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1702 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1703 1704 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1705 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1706 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1707 if (mumps->use_petsc_omp_support) { 1708 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1709 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1710 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1711 #else 1712 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1713 #endif 1714 } else { 1715 mumps->omp_comm = PETSC_COMM_SELF; 1716 mumps->mumps_comm = mumps->petsc_comm; 1717 mumps->is_omp_master = PETSC_TRUE; 1718 } 1719 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1720 mumps->reqs = NULL; 1721 mumps->tag = 0; 1722 1723 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1724 mumps->id.job = JOB_INIT; 1725 mumps->id.par = 1; /* host participates factorizaton and solve */ 1726 mumps->id.sym = mumps->sym; 1727 1728 PetscMUMPS_c(mumps); 1729 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1730 1731 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1732 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1733 */ 1734 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1735 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1736 1737 mumps->scat_rhs = NULL; 1738 mumps->scat_sol = NULL; 1739 1740 /* set PETSc-MUMPS default options - override MUMPS default */ 1741 mumps->id.ICNTL(3) = 0; 1742 mumps->id.ICNTL(4) = 0; 1743 if (mumps->petsc_size == 1) { 1744 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1745 } else { 1746 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1747 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1748 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1749 } 1750 1751 /* schur */ 1752 mumps->id.size_schur = 0; 1753 mumps->id.listvar_schur = NULL; 1754 mumps->id.schur = NULL; 1755 mumps->sizeredrhs = 0; 1756 mumps->schur_sol = NULL; 1757 mumps->schur_sizesol = 0; 1758 PetscFunctionReturn(0); 1759 } 1760 1761 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1762 { 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 if (mumps->id.INFOG(1) < 0) { 1767 if (A->erroriffailure) { 1768 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1769 } else { 1770 if (mumps->id.INFOG(1) == -6) { 1771 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1772 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1773 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1774 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1775 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1776 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1777 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1778 } else { 1779 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1780 F->factorerrortype = MAT_FACTOR_OTHER; 1781 } 1782 } 1783 } 1784 PetscFunctionReturn(0); 1785 } 1786 1787 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1788 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1789 { 1790 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1791 PetscErrorCode ierr; 1792 Vec b; 1793 const PetscInt M = A->rmap->N; 1794 1795 PetscFunctionBegin; 1796 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1797 1798 /* Set MUMPS options from the options database */ 1799 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1800 1801 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1802 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1803 1804 /* analysis phase */ 1805 /*----------------*/ 1806 mumps->id.job = JOB_FACTSYMBOLIC; 1807 mumps->id.n = M; 1808 switch (mumps->id.ICNTL(18)) { 1809 case 0: /* centralized assembled matrix input */ 1810 if (!mumps->myid) { 1811 mumps->id.nnz = mumps->nnz; 1812 mumps->id.irn = mumps->irn; 1813 mumps->id.jcn = mumps->jcn; 1814 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1815 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1816 /* 1817 PetscBool flag; 1818 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1819 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1820 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1821 */ 1822 if (!mumps->myid) { 1823 const PetscInt *idx; 1824 PetscInt i; 1825 1826 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1827 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1828 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1829 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1830 } 1831 } 1832 } 1833 break; 1834 case 3: /* distributed assembled matrix input (size>1) */ 1835 mumps->id.nnz_loc = mumps->nnz; 1836 mumps->id.irn_loc = mumps->irn; 1837 mumps->id.jcn_loc = mumps->jcn; 1838 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1839 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1840 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1841 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1842 ierr = VecDestroy(&b);CHKERRQ(ierr); 1843 break; 1844 } 1845 PetscMUMPS_c(mumps); 1846 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1847 1848 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1849 F->ops->solve = MatSolve_MUMPS; 1850 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1851 F->ops->matsolve = MatMatSolve_MUMPS; 1852 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 /* Note the Petsc r and c permutations are ignored */ 1857 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1858 { 1859 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1860 PetscErrorCode ierr; 1861 Vec b; 1862 const PetscInt M = A->rmap->N; 1863 1864 PetscFunctionBegin; 1865 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1866 1867 /* Set MUMPS options from the options database */ 1868 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1869 1870 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1871 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1872 1873 /* analysis phase */ 1874 /*----------------*/ 1875 mumps->id.job = JOB_FACTSYMBOLIC; 1876 mumps->id.n = M; 1877 switch (mumps->id.ICNTL(18)) { 1878 case 0: /* centralized assembled matrix input */ 1879 if (!mumps->myid) { 1880 mumps->id.nnz = mumps->nnz; 1881 mumps->id.irn = mumps->irn; 1882 mumps->id.jcn = mumps->jcn; 1883 if (mumps->id.ICNTL(6)>1) { 1884 mumps->id.a = (MumpsScalar*)mumps->val; 1885 } 1886 } 1887 break; 1888 case 3: /* distributed assembled matrix input (size>1) */ 1889 mumps->id.nnz_loc = mumps->nnz; 1890 mumps->id.irn_loc = mumps->irn; 1891 mumps->id.jcn_loc = mumps->jcn; 1892 if (mumps->id.ICNTL(6)>1) { 1893 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1894 } 1895 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1896 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1897 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1898 ierr = VecDestroy(&b);CHKERRQ(ierr); 1899 break; 1900 } 1901 PetscMUMPS_c(mumps); 1902 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1903 1904 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1905 F->ops->solve = MatSolve_MUMPS; 1906 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1907 PetscFunctionReturn(0); 1908 } 1909 1910 /* Note the Petsc r permutation and factor info are ignored */ 1911 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1912 { 1913 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1914 PetscErrorCode ierr; 1915 Vec b; 1916 const PetscInt M = A->rmap->N; 1917 1918 PetscFunctionBegin; 1919 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1920 1921 /* Set MUMPS options from the options database */ 1922 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1923 1924 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1925 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1926 1927 /* analysis phase */ 1928 /*----------------*/ 1929 mumps->id.job = JOB_FACTSYMBOLIC; 1930 mumps->id.n = M; 1931 switch (mumps->id.ICNTL(18)) { 1932 case 0: /* centralized assembled matrix input */ 1933 if (!mumps->myid) { 1934 mumps->id.nnz = mumps->nnz; 1935 mumps->id.irn = mumps->irn; 1936 mumps->id.jcn = mumps->jcn; 1937 if (mumps->id.ICNTL(6)>1) { 1938 mumps->id.a = (MumpsScalar*)mumps->val; 1939 } 1940 } 1941 break; 1942 case 3: /* distributed assembled matrix input (size>1) */ 1943 mumps->id.nnz_loc = mumps->nnz; 1944 mumps->id.irn_loc = mumps->irn; 1945 mumps->id.jcn_loc = mumps->jcn; 1946 if (mumps->id.ICNTL(6)>1) { 1947 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1948 } 1949 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1950 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1951 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1952 ierr = VecDestroy(&b);CHKERRQ(ierr); 1953 break; 1954 } 1955 PetscMUMPS_c(mumps); 1956 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1957 1958 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1959 F->ops->solve = MatSolve_MUMPS; 1960 F->ops->solvetranspose = MatSolve_MUMPS; 1961 F->ops->matsolve = MatMatSolve_MUMPS; 1962 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1963 #if defined(PETSC_USE_COMPLEX) 1964 F->ops->getinertia = NULL; 1965 #else 1966 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1967 #endif 1968 PetscFunctionReturn(0); 1969 } 1970 1971 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1972 { 1973 PetscErrorCode ierr; 1974 PetscBool iascii; 1975 PetscViewerFormat format; 1976 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1977 1978 PetscFunctionBegin; 1979 /* check if matrix is mumps type */ 1980 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1981 1982 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1983 if (iascii) { 1984 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1985 if (format == PETSC_VIEWER_ASCII_INFO) { 1986 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1987 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1989 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1990 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1991 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1992 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1993 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1994 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1995 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1996 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1997 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1998 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1999 if (mumps->id.ICNTL(11)>0) { 2000 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2001 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2002 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2003 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2004 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2005 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2006 } 2007 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2008 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2009 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2010 /* ICNTL(15-17) not used */ 2011 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2012 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2013 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2014 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2015 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2016 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2017 2018 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2019 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2020 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2021 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2022 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2023 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2024 2025 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2026 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2027 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2028 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2029 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2030 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2031 2032 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2033 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2034 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2035 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2036 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2037 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2038 2039 /* infomation local to each processor */ 2040 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2041 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2042 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2043 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2044 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2045 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2046 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2047 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2048 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2049 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2050 2051 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2052 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2053 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2054 2055 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2056 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2057 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2058 2059 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2060 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2061 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2062 2063 if (mumps->ninfo && mumps->ninfo <= 80){ 2064 PetscInt i; 2065 for (i=0; i<mumps->ninfo; i++){ 2066 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 2067 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2068 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2069 } 2070 } 2071 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2072 2073 if (!mumps->myid) { /* information from the host */ 2074 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2075 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2076 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2077 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2078 2079 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 2080 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 2081 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 2082 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 2083 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 2084 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 2085 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 2086 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 2087 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 2088 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 2089 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 2090 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 2091 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 2092 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 2093 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 2094 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 2095 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 2096 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 2097 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 2098 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 2099 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 2100 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 2101 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 2102 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2103 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2104 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2105 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2106 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2107 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2108 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2109 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 2110 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 2111 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 2112 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 2113 } 2114 } 2115 } 2116 PetscFunctionReturn(0); 2117 } 2118 2119 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2120 { 2121 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2122 2123 PetscFunctionBegin; 2124 info->block_size = 1.0; 2125 info->nz_allocated = mumps->id.INFOG(20); 2126 info->nz_used = mumps->id.INFOG(20); 2127 info->nz_unneeded = 0.0; 2128 info->assemblies = 0.0; 2129 info->mallocs = 0.0; 2130 info->memory = 0.0; 2131 info->fill_ratio_given = 0; 2132 info->fill_ratio_needed = 0; 2133 info->factor_mallocs = 0; 2134 PetscFunctionReturn(0); 2135 } 2136 2137 /* -------------------------------------------------------------------------------------------*/ 2138 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2139 { 2140 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2141 const PetscScalar *arr; 2142 const PetscInt *idxs; 2143 PetscInt size,i; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2148 if (mumps->petsc_size > 1) { 2149 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2150 2151 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2152 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 2153 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 2154 } 2155 2156 /* Schur complement matrix */ 2157 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2158 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2159 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2160 mumps->id.schur = (MumpsScalar*)arr; 2161 mumps->id.size_schur = size; 2162 mumps->id.schur_lld = size; 2163 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2164 if (mumps->sym == 1) { 2165 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2166 } 2167 2168 /* MUMPS expects Fortran style indices */ 2169 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2170 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2171 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2172 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2173 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2174 if (mumps->petsc_size > 1) { 2175 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2176 } else { 2177 if (F->factortype == MAT_FACTOR_LU) { 2178 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2179 } else { 2180 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2181 } 2182 } 2183 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2184 mumps->id.ICNTL(26) = -1; 2185 PetscFunctionReturn(0); 2186 } 2187 2188 /* -------------------------------------------------------------------------------------------*/ 2189 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2190 { 2191 Mat St; 2192 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2193 PetscScalar *array; 2194 #if defined(PETSC_USE_COMPLEX) 2195 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2196 #endif 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2201 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2202 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2203 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2204 ierr = MatSetUp(St);CHKERRQ(ierr); 2205 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2206 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2207 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2208 PetscInt i,j,N=mumps->id.size_schur; 2209 for (i=0;i<N;i++) { 2210 for (j=0;j<N;j++) { 2211 #if !defined(PETSC_USE_COMPLEX) 2212 PetscScalar val = mumps->id.schur[i*N+j]; 2213 #else 2214 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2215 #endif 2216 array[j*N+i] = val; 2217 } 2218 } 2219 } else { /* stored by columns */ 2220 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2221 } 2222 } else { /* either full or lower-triangular (not packed) */ 2223 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2224 PetscInt i,j,N=mumps->id.size_schur; 2225 for (i=0;i<N;i++) { 2226 for (j=i;j<N;j++) { 2227 #if !defined(PETSC_USE_COMPLEX) 2228 PetscScalar val = mumps->id.schur[i*N+j]; 2229 #else 2230 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2231 #endif 2232 array[i*N+j] = val; 2233 array[j*N+i] = val; 2234 } 2235 } 2236 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2237 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2238 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2239 PetscInt i,j,N=mumps->id.size_schur; 2240 for (i=0;i<N;i++) { 2241 for (j=0;j<i+1;j++) { 2242 #if !defined(PETSC_USE_COMPLEX) 2243 PetscScalar val = mumps->id.schur[i*N+j]; 2244 #else 2245 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2246 #endif 2247 array[i*N+j] = val; 2248 array[j*N+i] = val; 2249 } 2250 } 2251 } 2252 } 2253 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2254 *S = St; 2255 PetscFunctionReturn(0); 2256 } 2257 2258 /* -------------------------------------------------------------------------------------------*/ 2259 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2260 { 2261 PetscErrorCode ierr; 2262 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2263 2264 PetscFunctionBegin; 2265 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2270 { 2271 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2272 2273 PetscFunctionBegin; 2274 *ival = mumps->id.ICNTL(icntl); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 /*@ 2279 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2280 2281 Logically Collective on Mat 2282 2283 Input Parameters: 2284 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2285 . icntl - index of MUMPS parameter array ICNTL() 2286 - ival - value of MUMPS ICNTL(icntl) 2287 2288 Options Database: 2289 . -mat_mumps_icntl_<icntl> <ival> 2290 2291 Level: beginner 2292 2293 References: 2294 . MUMPS Users' Guide 2295 2296 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2297 @*/ 2298 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2299 { 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 PetscValidType(F,1); 2304 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2305 PetscValidLogicalCollectiveInt(F,icntl,2); 2306 PetscValidLogicalCollectiveInt(F,ival,3); 2307 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2308 PetscFunctionReturn(0); 2309 } 2310 2311 /*@ 2312 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2313 2314 Logically Collective on Mat 2315 2316 Input Parameters: 2317 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2318 - icntl - index of MUMPS parameter array ICNTL() 2319 2320 Output Parameter: 2321 . ival - value of MUMPS ICNTL(icntl) 2322 2323 Level: beginner 2324 2325 References: 2326 . MUMPS Users' Guide 2327 2328 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2329 @*/ 2330 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2331 { 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 PetscValidType(F,1); 2336 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2337 PetscValidLogicalCollectiveInt(F,icntl,2); 2338 PetscValidIntPointer(ival,3); 2339 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 /* -------------------------------------------------------------------------------------------*/ 2344 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2345 { 2346 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2347 2348 PetscFunctionBegin; 2349 mumps->id.CNTL(icntl) = val; 2350 PetscFunctionReturn(0); 2351 } 2352 2353 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2354 { 2355 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2356 2357 PetscFunctionBegin; 2358 *val = mumps->id.CNTL(icntl); 2359 PetscFunctionReturn(0); 2360 } 2361 2362 /*@ 2363 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2364 2365 Logically Collective on Mat 2366 2367 Input Parameters: 2368 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2369 . icntl - index of MUMPS parameter array CNTL() 2370 - val - value of MUMPS CNTL(icntl) 2371 2372 Options Database: 2373 . -mat_mumps_cntl_<icntl> <val> 2374 2375 Level: beginner 2376 2377 References: 2378 . MUMPS Users' Guide 2379 2380 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2381 @*/ 2382 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2383 { 2384 PetscErrorCode ierr; 2385 2386 PetscFunctionBegin; 2387 PetscValidType(F,1); 2388 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2389 PetscValidLogicalCollectiveInt(F,icntl,2); 2390 PetscValidLogicalCollectiveReal(F,val,3); 2391 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 /*@ 2396 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2397 2398 Logically Collective on Mat 2399 2400 Input Parameters: 2401 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2402 - icntl - index of MUMPS parameter array CNTL() 2403 2404 Output Parameter: 2405 . val - value of MUMPS CNTL(icntl) 2406 2407 Level: beginner 2408 2409 References: 2410 . MUMPS Users' Guide 2411 2412 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2413 @*/ 2414 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2415 { 2416 PetscErrorCode ierr; 2417 2418 PetscFunctionBegin; 2419 PetscValidType(F,1); 2420 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2421 PetscValidLogicalCollectiveInt(F,icntl,2); 2422 PetscValidRealPointer(val,3); 2423 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2424 PetscFunctionReturn(0); 2425 } 2426 2427 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2428 { 2429 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2430 2431 PetscFunctionBegin; 2432 *info = mumps->id.INFO(icntl); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2437 { 2438 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2439 2440 PetscFunctionBegin; 2441 *infog = mumps->id.INFOG(icntl); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2446 { 2447 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2448 2449 PetscFunctionBegin; 2450 *rinfo = mumps->id.RINFO(icntl); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2455 { 2456 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2457 2458 PetscFunctionBegin; 2459 *rinfog = mumps->id.RINFOG(icntl); 2460 PetscFunctionReturn(0); 2461 } 2462 2463 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2464 { 2465 PetscErrorCode ierr; 2466 Mat Bt = NULL,Btseq = NULL; 2467 PetscBool flg; 2468 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2469 PetscScalar *aa; 2470 PetscInt spnr,*ia,*ja,M,nrhs; 2471 2472 PetscFunctionBegin; 2473 PetscValidIntPointer(spRHS,2); 2474 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2475 if (flg) { 2476 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2477 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2478 2479 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2480 2481 if (mumps->petsc_size > 1) { 2482 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2483 Btseq = b->A; 2484 } else { 2485 Btseq = Bt; 2486 } 2487 2488 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2489 mumps->id.nrhs = nrhs; 2490 mumps->id.lrhs = M; 2491 mumps->id.rhs = NULL; 2492 2493 if (!mumps->myid) { 2494 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2495 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2496 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2497 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2498 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2499 } else { 2500 mumps->id.irhs_ptr = NULL; 2501 mumps->id.irhs_sparse = NULL; 2502 mumps->id.nz_rhs = 0; 2503 mumps->id.rhs_sparse = NULL; 2504 } 2505 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2506 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2507 2508 /* solve phase */ 2509 /*-------------*/ 2510 mumps->id.job = JOB_SOLVE; 2511 PetscMUMPS_c(mumps); 2512 if (mumps->id.INFOG(1) < 0) 2513 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2514 2515 if (!mumps->myid) { 2516 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2517 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2518 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2519 } 2520 PetscFunctionReturn(0); 2521 } 2522 2523 /*@ 2524 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2525 2526 Logically Collective on Mat 2527 2528 Input Parameters: 2529 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2530 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2531 2532 Output Parameter: 2533 . spRHS - requested entries of inverse of A 2534 2535 Level: beginner 2536 2537 References: 2538 . MUMPS Users' Guide 2539 2540 .seealso: MatGetFactor(), MatCreateTranspose() 2541 @*/ 2542 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2543 { 2544 PetscErrorCode ierr; 2545 2546 PetscFunctionBegin; 2547 PetscValidType(F,1); 2548 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2549 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2554 { 2555 PetscErrorCode ierr; 2556 Mat spRHS; 2557 2558 PetscFunctionBegin; 2559 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2560 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2561 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 /*@ 2566 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2567 2568 Logically Collective on Mat 2569 2570 Input Parameters: 2571 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2572 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2573 2574 Output Parameter: 2575 . spRHST - requested entries of inverse of A^T 2576 2577 Level: beginner 2578 2579 References: 2580 . MUMPS Users' Guide 2581 2582 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2583 @*/ 2584 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2585 { 2586 PetscErrorCode ierr; 2587 PetscBool flg; 2588 2589 PetscFunctionBegin; 2590 PetscValidType(F,1); 2591 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2592 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2593 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2594 2595 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2596 PetscFunctionReturn(0); 2597 } 2598 2599 /*@ 2600 MatMumpsGetInfo - Get MUMPS parameter INFO() 2601 2602 Logically Collective on Mat 2603 2604 Input Parameters: 2605 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2606 - icntl - index of MUMPS parameter array INFO() 2607 2608 Output Parameter: 2609 . ival - value of MUMPS INFO(icntl) 2610 2611 Level: beginner 2612 2613 References: 2614 . MUMPS Users' Guide 2615 2616 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2617 @*/ 2618 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2619 { 2620 PetscErrorCode ierr; 2621 2622 PetscFunctionBegin; 2623 PetscValidType(F,1); 2624 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2625 PetscValidIntPointer(ival,3); 2626 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2627 PetscFunctionReturn(0); 2628 } 2629 2630 /*@ 2631 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2632 2633 Logically Collective on Mat 2634 2635 Input Parameters: 2636 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2637 - icntl - index of MUMPS parameter array INFOG() 2638 2639 Output Parameter: 2640 . ival - value of MUMPS INFOG(icntl) 2641 2642 Level: beginner 2643 2644 References: 2645 . MUMPS Users' Guide 2646 2647 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2648 @*/ 2649 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2650 { 2651 PetscErrorCode ierr; 2652 2653 PetscFunctionBegin; 2654 PetscValidType(F,1); 2655 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2656 PetscValidIntPointer(ival,3); 2657 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2658 PetscFunctionReturn(0); 2659 } 2660 2661 /*@ 2662 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2663 2664 Logically Collective on Mat 2665 2666 Input Parameters: 2667 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2668 - icntl - index of MUMPS parameter array RINFO() 2669 2670 Output Parameter: 2671 . val - value of MUMPS RINFO(icntl) 2672 2673 Level: beginner 2674 2675 References: 2676 . MUMPS Users' Guide 2677 2678 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2679 @*/ 2680 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2681 { 2682 PetscErrorCode ierr; 2683 2684 PetscFunctionBegin; 2685 PetscValidType(F,1); 2686 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2687 PetscValidRealPointer(val,3); 2688 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2689 PetscFunctionReturn(0); 2690 } 2691 2692 /*@ 2693 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2694 2695 Logically Collective on Mat 2696 2697 Input Parameters: 2698 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2699 - icntl - index of MUMPS parameter array RINFOG() 2700 2701 Output Parameter: 2702 . val - value of MUMPS RINFOG(icntl) 2703 2704 Level: beginner 2705 2706 References: 2707 . MUMPS Users' Guide 2708 2709 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2710 @*/ 2711 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2712 { 2713 PetscErrorCode ierr; 2714 2715 PetscFunctionBegin; 2716 PetscValidType(F,1); 2717 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2718 PetscValidRealPointer(val,3); 2719 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 /*MC 2724 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2725 distributed and sequential matrices via the external package MUMPS. 2726 2727 Works with MATAIJ and MATSBAIJ matrices 2728 2729 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2730 2731 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2732 2733 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2734 2735 Options Database Keys: 2736 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2737 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2738 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2739 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2740 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2741 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2742 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2743 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2744 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2745 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2746 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2747 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2748 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2749 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2750 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2751 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2752 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2753 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2754 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2755 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2756 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2757 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2758 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2759 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2760 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2761 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2762 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2763 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2764 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2765 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2766 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2767 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2768 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2769 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2770 2771 Level: beginner 2772 2773 Notes: 2774 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2775 2776 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2777 $ KSPGetPC(ksp,&pc); 2778 $ PCFactorGetMatrix(pc,&mat); 2779 $ MatMumpsGetInfo(mat,....); 2780 $ MatMumpsGetInfog(mat,....); etc. 2781 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2782 2783 Two modes to run MUMPS/PETSc with OpenMP 2784 2785 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2786 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2787 2788 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2789 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2790 2791 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2792 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2793 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2794 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2795 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2796 2797 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2798 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2799 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2800 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2801 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2802 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2803 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2804 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2805 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2806 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2807 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2808 examine the mapping result. 2809 2810 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2811 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2812 calls omp_set_num_threads(m) internally before calling MUMPS. 2813 2814 References: 2815 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2816 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2817 2818 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2819 2820 M*/ 2821 2822 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2823 { 2824 PetscFunctionBegin; 2825 *type = MATSOLVERMUMPS; 2826 PetscFunctionReturn(0); 2827 } 2828 2829 /* MatGetFactor for Seq and MPI AIJ matrices */ 2830 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2831 { 2832 Mat B; 2833 PetscErrorCode ierr; 2834 Mat_MUMPS *mumps; 2835 PetscBool isSeqAIJ; 2836 2837 PetscFunctionBegin; 2838 #if defined(PETSC_USE_COMPLEX) 2839 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2840 #endif 2841 /* Create the factorization matrix */ 2842 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2843 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2844 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2845 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2846 ierr = MatSetUp(B);CHKERRQ(ierr); 2847 2848 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2849 2850 B->ops->view = MatView_MUMPS; 2851 B->ops->getinfo = MatGetInfo_MUMPS; 2852 2853 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2854 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2855 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2856 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2857 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2858 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2859 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2860 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2861 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2862 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2863 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2864 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2866 2867 if (ftype == MAT_FACTOR_LU) { 2868 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2869 B->factortype = MAT_FACTOR_LU; 2870 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2871 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2872 mumps->sym = 0; 2873 } else { 2874 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2875 B->factortype = MAT_FACTOR_CHOLESKY; 2876 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2877 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2878 #if defined(PETSC_USE_COMPLEX) 2879 mumps->sym = 2; 2880 #else 2881 if (A->spd_set && A->spd) mumps->sym = 1; 2882 else mumps->sym = 2; 2883 #endif 2884 } 2885 2886 /* set solvertype */ 2887 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2888 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2889 2890 B->ops->destroy = MatDestroy_MUMPS; 2891 B->data = (void*)mumps; 2892 2893 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2894 2895 *F = B; 2896 PetscFunctionReturn(0); 2897 } 2898 2899 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2900 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2901 { 2902 Mat B; 2903 PetscErrorCode ierr; 2904 Mat_MUMPS *mumps; 2905 PetscBool isSeqSBAIJ; 2906 2907 PetscFunctionBegin; 2908 #if defined(PETSC_USE_COMPLEX) 2909 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2910 #endif 2911 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2912 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2913 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2914 ierr = MatSetUp(B);CHKERRQ(ierr); 2915 2916 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2917 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2918 if (isSeqSBAIJ) { 2919 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2920 } else { 2921 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2922 } 2923 2924 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2925 B->ops->view = MatView_MUMPS; 2926 B->ops->getinfo = MatGetInfo_MUMPS; 2927 2928 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2929 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2930 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2931 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2932 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2933 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2934 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2935 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2936 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2937 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2938 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2939 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2940 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2941 2942 B->factortype = MAT_FACTOR_CHOLESKY; 2943 #if defined(PETSC_USE_COMPLEX) 2944 mumps->sym = 2; 2945 #else 2946 if (A->spd_set && A->spd) mumps->sym = 1; 2947 else mumps->sym = 2; 2948 #endif 2949 2950 /* set solvertype */ 2951 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2952 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2953 2954 B->ops->destroy = MatDestroy_MUMPS; 2955 B->data = (void*)mumps; 2956 2957 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2958 2959 *F = B; 2960 PetscFunctionReturn(0); 2961 } 2962 2963 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2964 { 2965 Mat B; 2966 PetscErrorCode ierr; 2967 Mat_MUMPS *mumps; 2968 PetscBool isSeqBAIJ; 2969 2970 PetscFunctionBegin; 2971 /* Create the factorization matrix */ 2972 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2973 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2974 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2975 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2976 ierr = MatSetUp(B);CHKERRQ(ierr); 2977 2978 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2979 if (ftype == MAT_FACTOR_LU) { 2980 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2981 B->factortype = MAT_FACTOR_LU; 2982 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2983 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2984 mumps->sym = 0; 2985 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2986 2987 B->ops->view = MatView_MUMPS; 2988 B->ops->getinfo = MatGetInfo_MUMPS; 2989 2990 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2991 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2992 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2994 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2995 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2996 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2998 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2999 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3000 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3001 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3002 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3003 3004 /* set solvertype */ 3005 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3006 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3007 3008 B->ops->destroy = MatDestroy_MUMPS; 3009 B->data = (void*)mumps; 3010 3011 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3012 3013 *F = B; 3014 PetscFunctionReturn(0); 3015 } 3016 3017 /* MatGetFactor for Seq and MPI SELL matrices */ 3018 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3019 { 3020 Mat B; 3021 PetscErrorCode ierr; 3022 Mat_MUMPS *mumps; 3023 PetscBool isSeqSELL; 3024 3025 PetscFunctionBegin; 3026 /* Create the factorization matrix */ 3027 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3028 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3029 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3030 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3031 ierr = MatSetUp(B);CHKERRQ(ierr); 3032 3033 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3034 3035 B->ops->view = MatView_MUMPS; 3036 B->ops->getinfo = MatGetInfo_MUMPS; 3037 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3049 3050 if (ftype == MAT_FACTOR_LU) { 3051 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3052 B->factortype = MAT_FACTOR_LU; 3053 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3054 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3055 mumps->sym = 0; 3056 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3057 3058 /* set solvertype */ 3059 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3060 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3061 3062 B->ops->destroy = MatDestroy_MUMPS; 3063 B->data = (void*)mumps; 3064 3065 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3066 3067 *F = B; 3068 PetscFunctionReturn(0); 3069 } 3070 3071 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3072 { 3073 PetscErrorCode ierr; 3074 3075 PetscFunctionBegin; 3076 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3077 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3078 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3079 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3080 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3081 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3082 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3083 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3084 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3085 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3086 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3087 PetscFunctionReturn(0); 3088 } 3089 3090