1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if defined(INTSIZE64) /* INTSIZE64 is a macro one used to build MUMPS in full 64-bit mode */ 53 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 54 #else 55 #define MPIU_MUMPSINT MPI_INT 56 #define PETSC_MUMPS_INT_MAX 2147483647 57 #define PETSC_MUMPS_INT_MIN -2147483648 58 #endif 59 60 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 61 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 62 { 63 PetscFunctionBegin; 64 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_64BIT_INDICES) 65 if (a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 66 #endif 67 *b = (PetscMUMPSInt)(a); 68 PetscFunctionReturn(0); 69 } 70 71 /* Put these utility routines here since they are only used in this file */ 72 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 73 { 74 PetscErrorCode ierr; 75 PetscInt myval; 76 PetscBool myset; 77 PetscFunctionBegin; 78 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 79 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 80 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 81 if (set) *set = myset; 82 PetscFunctionReturn(0); 83 } 84 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 85 86 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 87 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 88 #define PetscMUMPS_c(mumps) \ 89 do { \ 90 if (mumps->use_petsc_omp_support) { \ 91 if (mumps->is_omp_master) { \ 92 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 93 MUMPS_c(&mumps->id); \ 94 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 95 } \ 96 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 98 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 99 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 100 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 101 */ \ 102 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 103 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRQ(ierr); \ 104 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 105 } else { \ 106 MUMPS_c(&mumps->id); \ 107 } \ 108 } while(0) 109 #else 110 #define PetscMUMPS_c(mumps) \ 111 do { MUMPS_c(&mumps->id); } while (0) 112 #endif 113 114 /* declare MumpsScalar */ 115 #if defined(PETSC_USE_COMPLEX) 116 #if defined(PETSC_USE_REAL_SINGLE) 117 #define MumpsScalar mumps_complex 118 #else 119 #define MumpsScalar mumps_double_complex 120 #endif 121 #else 122 #define MumpsScalar PetscScalar 123 #endif 124 125 /* macros s.t. indices match MUMPS documentation */ 126 #define ICNTL(I) icntl[(I)-1] 127 #define CNTL(I) cntl[(I)-1] 128 #define INFOG(I) infog[(I)-1] 129 #define INFO(I) info[(I)-1] 130 #define RINFOG(I) rinfog[(I)-1] 131 #define RINFO(I) rinfo[(I)-1] 132 133 typedef struct Mat_MUMPS Mat_MUMPS; 134 struct Mat_MUMPS { 135 #if defined(PETSC_USE_COMPLEX) 136 #if defined(PETSC_USE_REAL_SINGLE) 137 CMUMPS_STRUC_C id; 138 #else 139 ZMUMPS_STRUC_C id; 140 #endif 141 #else 142 #if defined(PETSC_USE_REAL_SINGLE) 143 SMUMPS_STRUC_C id; 144 #else 145 DMUMPS_STRUC_C id; 146 #endif 147 #endif 148 149 MatStructure matstruc; 150 PetscMPIInt myid,petsc_size; 151 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 152 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 153 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 154 PetscMUMPSInt sym; 155 MPI_Comm mumps_comm; 156 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 157 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 158 Vec b_seq,x_seq; 159 PetscInt ninfo,*info; /* which INFO to display */ 160 PetscInt sizeredrhs; 161 PetscScalar *schur_sol; 162 PetscInt schur_sizesol; 163 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 164 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 165 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 166 167 /* stuff used by petsc/mumps OpenMP support*/ 168 PetscBool use_petsc_omp_support; 169 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 170 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 171 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 172 PetscMPIInt tag,omp_comm_size; 173 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 174 MPI_Request *reqs; 175 }; 176 177 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 178 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 179 */ 180 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 181 { 182 PetscErrorCode ierr; 183 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 184 185 PetscFunctionBegin; 186 #if defined(PETSC_USE_64BIT_INDICES) 187 { 188 PetscInt i; 189 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 190 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 191 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 192 mumps->cur_ilen = nrow+1; 193 } 194 if (nnz > mumps->cur_jlen) { 195 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 196 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 197 mumps->cur_jlen = nnz; 198 } 199 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 200 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 201 *ia_mumps = mumps->ia_alloc; 202 *ja_mumps = mumps->ja_alloc; 203 } 204 #else 205 *ia_mumps = ia; 206 *ja_mumps = ja; 207 #endif 208 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 213 { 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 218 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 219 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 220 mumps->id.size_schur = 0; 221 mumps->id.schur_lld = 0; 222 mumps->id.ICNTL(19) = 0; 223 PetscFunctionReturn(0); 224 } 225 226 /* solve with rhs in mumps->id.redrhs and return in the same location */ 227 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 228 { 229 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 230 Mat S,B,X; 231 MatFactorSchurStatus schurstatus; 232 PetscInt sizesol; 233 PetscErrorCode ierr; 234 235 PetscFunctionBegin; 236 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 237 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 238 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 239 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 240 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 241 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 242 #endif 243 switch (schurstatus) { 244 case MAT_FACTOR_SCHUR_FACTORED: 245 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 246 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 247 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 248 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 249 #endif 250 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 251 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 252 } else { 253 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 254 } 255 break; 256 case MAT_FACTOR_SCHUR_INVERTED: 257 sizesol = mumps->id.nrhs*mumps->id.size_schur; 258 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 259 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 260 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 261 mumps->schur_sizesol = sizesol; 262 } 263 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 264 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 265 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 266 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 267 #endif 268 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 269 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 270 } else { 271 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 272 } 273 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 274 break; 275 default: 276 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 277 break; 278 } 279 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 280 ierr = MatDestroy(&B);CHKERRQ(ierr); 281 ierr = MatDestroy(&X);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 286 { 287 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 288 PetscErrorCode ierr; 289 290 PetscFunctionBegin; 291 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 292 PetscFunctionReturn(0); 293 } 294 if (!expansion) { /* prepare for the condensation step */ 295 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 296 /* allocate MUMPS internal array to store reduced right-hand sides */ 297 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 298 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 299 mumps->id.lredrhs = mumps->id.size_schur; 300 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 301 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 302 } 303 mumps->id.ICNTL(26) = 1; /* condensation phase */ 304 } else { /* prepare for the expansion step */ 305 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 306 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 307 mumps->id.ICNTL(26) = 2; /* expansion phase */ 308 PetscMUMPS_c(mumps); 309 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 310 /* restore defaults */ 311 mumps->id.ICNTL(26) = -1; 312 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 313 if (mumps->id.nrhs > 1) { 314 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 315 mumps->id.lredrhs = 0; 316 mumps->sizeredrhs = 0; 317 } 318 } 319 PetscFunctionReturn(0); 320 } 321 322 /* 323 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 324 325 input: 326 A - matrix in aij,baij or sbaij format 327 shift - 0: C style output triple; 1: Fortran style output triple. 328 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 329 MAT_REUSE_MATRIX: only the values in v array are updated 330 output: 331 nnz - dim of r, c, and v (number of local nonzero entries of A) 332 r, c, v - row and col index, matrix values (matrix triples) 333 334 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 335 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 336 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 337 338 */ 339 340 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 341 { 342 const PetscScalar *av; 343 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 344 PetscInt64 nz,rnz,i,j,k; 345 PetscErrorCode ierr; 346 PetscMUMPSInt *row,*col; 347 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 348 349 PetscFunctionBegin; 350 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 351 mumps->val = (PetscScalar*)av; 352 if (reuse == MAT_INITIAL_MATRIX) { 353 nz = aa->nz; 354 ai = aa->i; 355 aj = aa->j; 356 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 357 for (i=k=0; i<M; i++) { 358 rnz = ai[i+1] - ai[i]; 359 ajj = aj + ai[i]; 360 for (j=0; j<rnz; j++) { 361 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 362 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 363 k++; 364 } 365 } 366 mumps->irn = row; 367 mumps->jcn = col; 368 mumps->nnz = nz; 369 } 370 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 371 PetscFunctionReturn(0); 372 } 373 374 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 375 { 376 PetscErrorCode ierr; 377 PetscInt64 nz,i,j,k,r; 378 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 379 PetscMUMPSInt *row,*col; 380 381 PetscFunctionBegin; 382 mumps->val = a->val; 383 if (reuse == MAT_INITIAL_MATRIX) { 384 nz = a->sliidx[a->totalslices]; 385 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 386 for (i=k=0; i<a->totalslices; i++) { 387 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 388 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 389 } 390 } 391 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 392 mumps->irn = row; 393 mumps->jcn = col; 394 mumps->nnz = nz; 395 } 396 PetscFunctionReturn(0); 397 } 398 399 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 400 { 401 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 402 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 403 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 404 PetscInt bs; 405 PetscErrorCode ierr; 406 PetscMUMPSInt *row,*col; 407 408 PetscFunctionBegin; 409 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 410 M = A->rmap->N/bs; 411 mumps->val = aa->a; 412 if (reuse == MAT_INITIAL_MATRIX) { 413 ai = aa->i; aj = aa->j; 414 nz = bs2*aa->nz; 415 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 416 for (i=0; i<M; i++) { 417 ajj = aj + ai[i]; 418 rnz = ai[i+1] - ai[i]; 419 for (k=0; k<rnz; k++) { 420 for (j=0; j<bs; j++) { 421 for (m=0; m<bs; m++) { 422 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 423 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 424 idx++; 425 } 426 } 427 } 428 } 429 mumps->irn = row; 430 mumps->jcn = col; 431 mumps->nnz = nz; 432 } 433 PetscFunctionReturn(0); 434 } 435 436 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 437 { 438 const PetscInt *ai, *aj,*ajj; 439 PetscInt bs; 440 PetscInt64 nz,rnz,i,j,k,m; 441 PetscErrorCode ierr; 442 PetscMUMPSInt *row,*col; 443 PetscScalar *val; 444 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 445 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 446 #if defined(PETSC_USE_COMPLEX) 447 PetscBool hermitian; 448 #endif 449 450 PetscFunctionBegin; 451 #if defined(PETSC_USE_COMPLEX) 452 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 453 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 454 #endif 455 ai = aa->i; 456 aj = aa->j; 457 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 458 if (reuse == MAT_INITIAL_MATRIX) { 459 nz = aa->nz; 460 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 461 if (bs>1) { 462 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 463 mumps->val = mumps->val_alloc; 464 } else { 465 mumps->val = aa->a; 466 } 467 mumps->irn = row; 468 mumps->jcn = col; 469 } else { 470 if (bs == 1) mumps->val = aa->a; 471 row = mumps->irn; 472 col = mumps->jcn; 473 } 474 val = mumps->val; 475 476 nz = 0; 477 if (bs>1) { 478 for (i=0; i<mbs; i++) { 479 rnz = ai[i+1] - ai[i]; 480 ajj = aj + ai[i]; 481 for (j=0; j<rnz; j++) { 482 for (k=0; k<bs; k++) { 483 for (m=0; m<bs; m++) { 484 if (ajj[j]>i || k>=m) { 485 if (reuse == MAT_INITIAL_MATRIX) { 486 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 487 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 488 } 489 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 490 } 491 } 492 } 493 } 494 } 495 } else if (reuse == MAT_INITIAL_MATRIX) { 496 for (i=0; i<mbs; i++) { 497 rnz = ai[i+1] - ai[i]; 498 ajj = aj + ai[i]; 499 for (j=0; j<rnz; j++) { 500 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 501 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 502 nz++; 503 } 504 } 505 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 506 } 507 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 508 PetscFunctionReturn(0); 509 } 510 511 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 512 { 513 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 514 PetscInt64 nz,rnz,i,j; 515 const PetscScalar *av,*v1; 516 PetscScalar *val; 517 PetscErrorCode ierr; 518 PetscMUMPSInt *row,*col; 519 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 520 PetscBool missing; 521 #if defined(PETSC_USE_COMPLEX) 522 PetscBool hermitian; 523 #endif 524 525 PetscFunctionBegin; 526 #if defined(PETSC_USE_COMPLEX) 527 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 528 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 529 #endif 530 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 531 ai = aa->i; aj = aa->j; 532 adiag = aa->diag; 533 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 534 if (reuse == MAT_INITIAL_MATRIX) { 535 /* count nz in the upper triangular part of A */ 536 nz = 0; 537 if (missing) { 538 for (i=0; i<M; i++) { 539 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 540 for (j=ai[i];j<ai[i+1];j++) { 541 if (aj[j] < i) continue; 542 nz++; 543 } 544 } else { 545 nz += ai[i+1] - adiag[i]; 546 } 547 } 548 } else { 549 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 550 } 551 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 552 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 553 mumps->nnz = nz; 554 mumps->irn = row; 555 mumps->jcn = col; 556 mumps->val = mumps->val_alloc = val; 557 558 nz = 0; 559 if (missing) { 560 for (i=0; i<M; i++) { 561 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 562 for (j=ai[i];j<ai[i+1];j++) { 563 if (aj[j] < i) continue; 564 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 565 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 566 val[nz] = av[j]; 567 nz++; 568 } 569 } else { 570 rnz = ai[i+1] - adiag[i]; 571 ajj = aj + adiag[i]; 572 v1 = av + adiag[i]; 573 for (j=0; j<rnz; j++) { 574 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 575 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 576 val[nz++] = v1[j]; 577 } 578 } 579 } 580 } else { 581 for (i=0; i<M; i++) { 582 rnz = ai[i+1] - adiag[i]; 583 ajj = aj + adiag[i]; 584 v1 = av + adiag[i]; 585 for (j=0; j<rnz; j++) { 586 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 587 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 588 val[nz++] = v1[j]; 589 } 590 } 591 } 592 } else { 593 nz = 0; 594 val = mumps->val; 595 if (missing) { 596 for (i=0; i <M; i++) { 597 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 598 for (j=ai[i];j<ai[i+1];j++) { 599 if (aj[j] < i) continue; 600 val[nz++] = av[j]; 601 } 602 } else { 603 rnz = ai[i+1] - adiag[i]; 604 v1 = av + adiag[i]; 605 for (j=0; j<rnz; j++) { 606 val[nz++] = v1[j]; 607 } 608 } 609 } 610 } else { 611 for (i=0; i <M; i++) { 612 rnz = ai[i+1] - adiag[i]; 613 v1 = av + adiag[i]; 614 for (j=0; j<rnz; j++) { 615 val[nz++] = v1[j]; 616 } 617 } 618 } 619 } 620 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 625 { 626 PetscErrorCode ierr; 627 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 628 PetscInt bs; 629 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 630 PetscMUMPSInt *row,*col; 631 const PetscScalar *av,*bv,*v1,*v2; 632 PetscScalar *val; 633 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 634 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 635 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 636 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 637 #if defined(PETSC_USE_COMPLEX) 638 PetscBool hermitian; 639 #endif 640 641 PetscFunctionBegin; 642 #if defined(PETSC_USE_COMPLEX) 643 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 644 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 645 #endif 646 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 647 rstart = A->rmap->rstart; 648 ai = aa->i; 649 aj = aa->j; 650 bi = bb->i; 651 bj = bb->j; 652 av = aa->a; 653 bv = bb->a; 654 655 garray = mat->garray; 656 657 if (reuse == MAT_INITIAL_MATRIX) { 658 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 659 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 660 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 661 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 662 mumps->irn = row; 663 mumps->jcn = col; 664 mumps->val = mumps->val_alloc = val; 665 } else { 666 val = mumps->val; 667 } 668 669 jj = 0; irow = rstart; 670 for (i=0; i<mbs; i++) { 671 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 672 countA = ai[i+1] - ai[i]; 673 countB = bi[i+1] - bi[i]; 674 bjj = bj + bi[i]; 675 v1 = av + ai[i]*bs2; 676 v2 = bv + bi[i]*bs2; 677 678 if (bs>1) { 679 /* A-part */ 680 for (j=0; j<countA; j++) { 681 for (k=0; k<bs; k++) { 682 for (m=0; m<bs; m++) { 683 if (rstart + ajj[j]*bs>irow || k>=m) { 684 if (reuse == MAT_INITIAL_MATRIX) { 685 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 686 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 687 } 688 val[jj++] = v1[j*bs2 + m + k*bs]; 689 } 690 } 691 } 692 } 693 694 /* B-part */ 695 for (j=0; j < countB; j++) { 696 for (k=0; k<bs; k++) { 697 for (m=0; m<bs; m++) { 698 if (reuse == MAT_INITIAL_MATRIX) { 699 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 700 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 701 } 702 val[jj++] = v2[j*bs2 + m + k*bs]; 703 } 704 } 705 } 706 } else { 707 /* A-part */ 708 for (j=0; j<countA; j++) { 709 if (reuse == MAT_INITIAL_MATRIX) { 710 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 711 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 712 } 713 val[jj++] = v1[j]; 714 } 715 716 /* B-part */ 717 for (j=0; j < countB; j++) { 718 if (reuse == MAT_INITIAL_MATRIX) { 719 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 720 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 721 } 722 val[jj++] = v2[j]; 723 } 724 } 725 irow+=bs; 726 } 727 mumps->nnz = jj; 728 PetscFunctionReturn(0); 729 } 730 731 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 732 { 733 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 734 PetscErrorCode ierr; 735 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 736 PetscMUMPSInt *row,*col; 737 const PetscScalar *av, *bv,*v1,*v2; 738 PetscScalar *val; 739 Mat Ad,Ao; 740 Mat_SeqAIJ *aa; 741 Mat_SeqAIJ *bb; 742 743 PetscFunctionBegin; 744 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 745 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 746 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 747 748 aa = (Mat_SeqAIJ*)(Ad)->data; 749 bb = (Mat_SeqAIJ*)(Ao)->data; 750 ai = aa->i; 751 aj = aa->j; 752 bi = bb->i; 753 bj = bb->j; 754 755 rstart = A->rmap->rstart; 756 757 if (reuse == MAT_INITIAL_MATRIX) { 758 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 759 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 760 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 761 mumps->nnz = nz; 762 mumps->irn = row; 763 mumps->jcn = col; 764 mumps->val = mumps->val_alloc = val; 765 } else { 766 val = mumps->val; 767 } 768 769 jj = 0; irow = rstart; 770 for (i=0; i<m; i++) { 771 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 772 countA = ai[i+1] - ai[i]; 773 countB = bi[i+1] - bi[i]; 774 bjj = bj + bi[i]; 775 v1 = av + ai[i]; 776 v2 = bv + bi[i]; 777 778 /* A-part */ 779 for (j=0; j<countA; j++) { 780 if (reuse == MAT_INITIAL_MATRIX) { 781 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 782 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 783 } 784 val[jj++] = v1[j]; 785 } 786 787 /* B-part */ 788 for (j=0; j < countB; j++) { 789 if (reuse == MAT_INITIAL_MATRIX) { 790 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 791 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 792 } 793 val[jj++] = v2[j]; 794 } 795 irow++; 796 } 797 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 798 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 799 PetscFunctionReturn(0); 800 } 801 802 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 803 { 804 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 805 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 806 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 807 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 808 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 809 const PetscInt bs2=mat->bs2; 810 PetscErrorCode ierr; 811 PetscInt bs; 812 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 813 PetscMUMPSInt *row,*col; 814 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 815 PetscScalar *val; 816 817 PetscFunctionBegin; 818 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 819 if (reuse == MAT_INITIAL_MATRIX) { 820 nz = bs2*(aa->nz + bb->nz); 821 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 822 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 823 mumps->nnz = nz; 824 mumps->irn = row; 825 mumps->jcn = col; 826 mumps->val = mumps->val_alloc = val; 827 } else { 828 val = mumps->val; 829 } 830 831 jj = 0; irow = rstart; 832 for (i=0; i<mbs; i++) { 833 countA = ai[i+1] - ai[i]; 834 countB = bi[i+1] - bi[i]; 835 ajj = aj + ai[i]; 836 bjj = bj + bi[i]; 837 v1 = av + bs2*ai[i]; 838 v2 = bv + bs2*bi[i]; 839 840 idx = 0; 841 /* A-part */ 842 for (k=0; k<countA; k++) { 843 for (j=0; j<bs; j++) { 844 for (n=0; n<bs; n++) { 845 if (reuse == MAT_INITIAL_MATRIX) { 846 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 847 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 848 } 849 val[jj++] = v1[idx++]; 850 } 851 } 852 } 853 854 idx = 0; 855 /* B-part */ 856 for (k=0; k<countB; k++) { 857 for (j=0; j<bs; j++) { 858 for (n=0; n<bs; n++) { 859 if (reuse == MAT_INITIAL_MATRIX) { 860 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 861 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 862 } 863 val[jj++] = v2[idx++]; 864 } 865 } 866 } 867 irow += bs; 868 } 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 873 { 874 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 875 PetscErrorCode ierr; 876 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 877 PetscMUMPSInt *row,*col; 878 const PetscScalar *av, *bv,*v1,*v2; 879 PetscScalar *val; 880 Mat Ad,Ao; 881 Mat_SeqAIJ *aa; 882 Mat_SeqAIJ *bb; 883 #if defined(PETSC_USE_COMPLEX) 884 PetscBool hermitian; 885 #endif 886 887 PetscFunctionBegin; 888 #if defined(PETSC_USE_COMPLEX) 889 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 890 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 891 #endif 892 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 893 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 894 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 895 896 aa = (Mat_SeqAIJ*)(Ad)->data; 897 bb = (Mat_SeqAIJ*)(Ao)->data; 898 ai = aa->i; 899 aj = aa->j; 900 adiag = aa->diag; 901 bi = bb->i; 902 bj = bb->j; 903 904 rstart = A->rmap->rstart; 905 906 if (reuse == MAT_INITIAL_MATRIX) { 907 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 908 nzb = 0; /* num of upper triangular entries in mat->B */ 909 for (i=0; i<m; i++) { 910 nza += (ai[i+1] - adiag[i]); 911 countB = bi[i+1] - bi[i]; 912 bjj = bj + bi[i]; 913 for (j=0; j<countB; j++) { 914 if (garray[bjj[j]] > rstart) nzb++; 915 } 916 } 917 918 nz = nza + nzb; /* total nz of upper triangular part of mat */ 919 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 920 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 921 mumps->nnz = nz; 922 mumps->irn = row; 923 mumps->jcn = col; 924 mumps->val = mumps->val_alloc = val; 925 } else { 926 val = mumps->val; 927 } 928 929 jj = 0; irow = rstart; 930 for (i=0; i<m; i++) { 931 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 932 v1 = av + adiag[i]; 933 countA = ai[i+1] - adiag[i]; 934 countB = bi[i+1] - bi[i]; 935 bjj = bj + bi[i]; 936 v2 = bv + bi[i]; 937 938 /* A-part */ 939 for (j=0; j<countA; j++) { 940 if (reuse == MAT_INITIAL_MATRIX) { 941 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 942 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 943 } 944 val[jj++] = v1[j]; 945 } 946 947 /* B-part */ 948 for (j=0; j < countB; j++) { 949 if (garray[bjj[j]] > rstart) { 950 if (reuse == MAT_INITIAL_MATRIX) { 951 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 952 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 953 } 954 val[jj++] = v2[j]; 955 } 956 } 957 irow++; 958 } 959 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 960 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 PetscErrorCode MatDestroy_MUMPS(Mat A) 965 { 966 PetscErrorCode ierr; 967 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 968 969 PetscFunctionBegin; 970 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 971 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 972 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 973 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 974 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 975 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 976 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 977 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 978 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 979 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 980 mumps->id.job = JOB_END; 981 PetscMUMPS_c(mumps); 982 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 983 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 984 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 985 #endif 986 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 987 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 988 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 989 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 990 ierr = PetscFree(A->data);CHKERRQ(ierr); 991 992 /* clear composed functions */ 993 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 994 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 995 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 996 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 997 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 998 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 999 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1000 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1001 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1002 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1003 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1004 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1005 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1010 { 1011 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1012 PetscScalar *array; 1013 Vec b_seq; 1014 IS is_iden,is_petsc; 1015 PetscErrorCode ierr; 1016 PetscInt i; 1017 PetscBool second_solve = PETSC_FALSE; 1018 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1019 1020 PetscFunctionBegin; 1021 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1022 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1023 1024 if (A->factorerrortype) { 1025 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1026 ierr = VecSetInf(x);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1031 mumps->id.nrhs = 1; 1032 b_seq = mumps->b_seq; 1033 if (mumps->petsc_size > 1) { 1034 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 1035 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 1038 } else { /* petsc_size == 1 */ 1039 ierr = VecCopy(b,x);CHKERRQ(ierr); 1040 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1041 } 1042 if (!mumps->myid) { /* define rhs on the host */ 1043 mumps->id.nrhs = 1; 1044 mumps->id.rhs = (MumpsScalar*)array; 1045 } 1046 1047 /* 1048 handle condensation step of Schur complement (if any) 1049 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1050 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1051 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1052 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1053 */ 1054 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1055 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1056 second_solve = PETSC_TRUE; 1057 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1058 } 1059 /* solve phase */ 1060 /*-------------*/ 1061 mumps->id.job = JOB_SOLVE; 1062 PetscMUMPS_c(mumps); 1063 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1064 1065 /* handle expansion step of Schur complement (if any) */ 1066 if (second_solve) { 1067 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1068 } 1069 1070 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1071 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1072 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1073 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1074 } 1075 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1076 PetscInt *isol2_loc=NULL; 1077 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1078 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1079 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1080 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1081 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1082 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1083 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1084 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1085 } 1086 1087 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 } 1090 1091 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 1092 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1093 1094 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1099 { 1100 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 mumps->id.ICNTL(9) = 0; 1105 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1106 mumps->id.ICNTL(9) = 1; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1111 { 1112 PetscErrorCode ierr; 1113 Mat Bt = NULL; 1114 PetscBool denseX,denseB,flg,flgT; 1115 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1116 PetscInt i,nrhs,M; 1117 PetscScalar *array; 1118 const PetscScalar *rbray; 1119 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1120 PetscMUMPSInt *isol_loc,*isol_loc_save; 1121 PetscScalar *bray,*sol_loc,*sol_loc_save; 1122 IS is_to,is_from; 1123 PetscInt k,proc,j,m,myrstart; 1124 const PetscInt *rstart; 1125 Vec v_mpi,b_seq,msol_loc; 1126 VecScatter scat_rhs,scat_sol; 1127 PetscScalar *aa; 1128 PetscInt spnr,*ia,*ja; 1129 Mat_MPIAIJ *b = NULL; 1130 1131 PetscFunctionBegin; 1132 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1133 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1134 1135 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1136 if (denseB) { 1137 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1138 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1139 } else { /* sparse B */ 1140 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1141 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1142 if (flgT) { /* input B is transpose of actural RHS matrix, 1143 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1144 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1145 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1146 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1147 } 1148 1149 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1150 mumps->id.nrhs = nrhs; 1151 mumps->id.lrhs = M; 1152 mumps->id.rhs = NULL; 1153 1154 if (mumps->petsc_size == 1) { 1155 PetscScalar *aa; 1156 PetscInt spnr,*ia,*ja; 1157 PetscBool second_solve = PETSC_FALSE; 1158 1159 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1160 mumps->id.rhs = (MumpsScalar*)array; 1161 1162 if (denseB) { 1163 /* copy B to X */ 1164 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1165 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1166 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1167 } else { /* sparse B */ 1168 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1169 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1170 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1171 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1172 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1173 } 1174 /* handle condensation step of Schur complement (if any) */ 1175 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1176 second_solve = PETSC_TRUE; 1177 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1178 } 1179 /* solve phase */ 1180 /*-------------*/ 1181 mumps->id.job = JOB_SOLVE; 1182 PetscMUMPS_c(mumps); 1183 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1184 1185 /* handle expansion step of Schur complement (if any) */ 1186 if (second_solve) { 1187 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1188 } 1189 if (!denseB) { /* sparse B */ 1190 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1191 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1192 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1193 } 1194 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1199 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1200 1201 /* create msol_loc to hold mumps local solution */ 1202 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1203 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1204 1205 lsol_loc = mumps->id.lsol_loc; 1206 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1207 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1208 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1209 mumps->id.isol_loc = isol_loc; 1210 1211 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1212 1213 if (denseB) { /* dense B */ 1214 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1215 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1216 0, re-arrange B into desired order, which is a local operation. 1217 */ 1218 1219 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1220 /* wrap dense rhs matrix B into a vector v_mpi */ 1221 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1222 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1223 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1224 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1225 1226 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1227 if (!mumps->myid) { 1228 PetscInt *idx; 1229 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1230 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1231 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1232 k = 0; 1233 for (proc=0; proc<mumps->petsc_size; proc++){ 1234 for (j=0; j<nrhs; j++){ 1235 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1236 } 1237 } 1238 1239 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1240 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1241 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1242 } else { 1243 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1244 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1245 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1246 } 1247 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1248 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1250 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 if (!mumps->myid) { /* define rhs on the host */ 1254 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1255 mumps->id.rhs = (MumpsScalar*)bray; 1256 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1257 } 1258 1259 } else { /* sparse B */ 1260 b = (Mat_MPIAIJ*)Bt->data; 1261 1262 /* wrap dense X into a vector v_mpi */ 1263 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1264 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1265 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1266 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1267 1268 if (!mumps->myid) { 1269 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1270 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1271 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1272 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1273 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1274 } else { 1275 mumps->id.irhs_ptr = NULL; 1276 mumps->id.irhs_sparse = NULL; 1277 mumps->id.nz_rhs = 0; 1278 mumps->id.rhs_sparse = NULL; 1279 } 1280 } 1281 1282 /* solve phase */ 1283 /*-------------*/ 1284 mumps->id.job = JOB_SOLVE; 1285 PetscMUMPS_c(mumps); 1286 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1287 1288 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1289 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1290 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1291 1292 /* create scatter scat_sol */ 1293 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1294 /* iidx: index for scatter mumps solution to petsc X */ 1295 1296 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1297 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1298 for (i=0; i<lsol_loc; i++) { 1299 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1300 1301 for (proc=0; proc<mumps->petsc_size; proc++){ 1302 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1303 myrstart = rstart[proc]; 1304 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1305 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1306 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1307 break; 1308 } 1309 } 1310 1311 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1312 } 1313 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1314 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1315 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1316 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1317 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1318 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1320 1321 /* free spaces */ 1322 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1323 mumps->id.isol_loc = isol_loc_save; 1324 1325 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1326 ierr = PetscFree(idxx);CHKERRQ(ierr); 1327 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1328 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1329 if (!denseB) { 1330 if (!mumps->myid) { 1331 b = (Mat_MPIAIJ*)Bt->data; 1332 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1333 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1334 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1335 } 1336 } else { 1337 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1338 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1339 } 1340 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1341 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1346 { 1347 PetscErrorCode ierr; 1348 PetscBool flg; 1349 Mat B; 1350 1351 PetscFunctionBegin; 1352 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1353 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1354 1355 /* Create B=Bt^T that uses Bt's data structure */ 1356 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1357 1358 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1359 ierr = MatDestroy(&B);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #if !defined(PETSC_USE_COMPLEX) 1364 /* 1365 input: 1366 F: numeric factor 1367 output: 1368 nneg: total number of negative pivots 1369 nzero: total number of zero pivots 1370 npos: (global dimension of F) - nneg - nzero 1371 */ 1372 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1373 { 1374 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1375 PetscErrorCode ierr; 1376 PetscMPIInt size; 1377 1378 PetscFunctionBegin; 1379 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1380 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1381 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1382 1383 if (nneg) *nneg = mumps->id.INFOG(12); 1384 if (nzero || npos) { 1385 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1386 if (nzero) *nzero = mumps->id.INFOG(28); 1387 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 #endif 1392 1393 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1394 { 1395 PetscErrorCode ierr; 1396 PetscInt i,nreqs; 1397 PetscMUMPSInt *irn,*jcn; 1398 PetscMPIInt count; 1399 PetscInt64 totnnz,remain; 1400 const PetscInt osize=mumps->omp_comm_size; 1401 PetscScalar *val; 1402 1403 PetscFunctionBegin; 1404 if (osize > 1) { 1405 if (reuse == MAT_INITIAL_MATRIX) { 1406 /* master first gathers counts of nonzeros to receive */ 1407 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1408 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRQ(ierr); 1409 1410 /* Then each computes number of send/recvs */ 1411 if (mumps->is_omp_master) { 1412 /* Start from 1 since self communication is not done in MPI */ 1413 nreqs = 0; 1414 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1415 } else { 1416 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1417 } 1418 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1419 1420 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1421 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1422 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1423 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1424 */ 1425 nreqs = 0; /* counter for actual send/recvs */ 1426 if (mumps->is_omp_master) { 1427 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1428 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1429 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1430 1431 /* Self communication */ 1432 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1433 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1434 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1435 1436 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1437 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1438 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1439 mumps->nnz = totnnz; 1440 mumps->irn = irn; 1441 mumps->jcn = jcn; 1442 mumps->val = mumps->val_alloc = val; 1443 1444 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1445 jcn += mumps->recvcount[0]; 1446 val += mumps->recvcount[0]; 1447 1448 /* Remote communication */ 1449 for (i=1; i<osize; i++) { 1450 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1451 remain = mumps->recvcount[i] - count; 1452 while (count>0) { 1453 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1454 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1455 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1456 irn += count; 1457 jcn += count; 1458 val += count; 1459 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1460 remain -= count; 1461 } 1462 } 1463 } else { 1464 irn = mumps->irn; 1465 jcn = mumps->jcn; 1466 val = mumps->val; 1467 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1468 remain = mumps->nnz - count; 1469 while (count>0) { 1470 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1471 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1472 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1473 irn += count; 1474 jcn += count; 1475 val += count; 1476 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1477 remain -= count; 1478 } 1479 } 1480 } else { 1481 nreqs = 0; 1482 if (mumps->is_omp_master) { 1483 val = mumps->val + mumps->recvcount[0]; 1484 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1485 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1486 remain = mumps->recvcount[i] - count; 1487 while (count>0) { 1488 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1489 val += count; 1490 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1491 remain -= count; 1492 } 1493 } 1494 } else { 1495 val = mumps->val; 1496 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1497 remain = mumps->nnz - count; 1498 while (count>0) { 1499 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1500 val += count; 1501 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1502 remain -= count; 1503 } 1504 } 1505 } 1506 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1507 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1513 { 1514 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1515 PetscErrorCode ierr; 1516 PetscBool isMPIAIJ; 1517 1518 PetscFunctionBegin; 1519 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1520 if (mumps->id.INFOG(1) == -6) { 1521 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1522 } 1523 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1528 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1529 1530 /* numerical factorization phase */ 1531 /*-------------------------------*/ 1532 mumps->id.job = JOB_FACTNUMERIC; 1533 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1534 if (!mumps->myid) { 1535 mumps->id.a = (MumpsScalar*)mumps->val; 1536 } 1537 } else { 1538 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1539 } 1540 PetscMUMPS_c(mumps); 1541 if (mumps->id.INFOG(1) < 0) { 1542 if (A->erroriffailure) { 1543 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1544 } else { 1545 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1546 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1547 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1548 } else if (mumps->id.INFOG(1) == -13) { 1549 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1550 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1551 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1552 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1553 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1554 } else { 1555 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1556 F->factorerrortype = MAT_FACTOR_OTHER; 1557 } 1558 } 1559 } 1560 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1561 1562 F->assembled = PETSC_TRUE; 1563 mumps->matstruc = SAME_NONZERO_PATTERN; 1564 if (F->schur) { /* reset Schur status to unfactored */ 1565 #if defined(PETSC_HAVE_CUDA) 1566 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1567 #endif 1568 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1569 mumps->id.ICNTL(19) = 2; 1570 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1571 } 1572 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1573 } 1574 1575 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1576 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1577 1578 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1579 if (mumps->petsc_size > 1) { 1580 PetscInt lsol_loc; 1581 PetscScalar *sol_loc; 1582 1583 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1584 1585 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1586 if (mumps->x_seq) { 1587 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1588 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1589 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1590 } 1591 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1592 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1593 mumps->id.lsol_loc = lsol_loc; 1594 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1595 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1596 } 1597 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 /* Sets MUMPS options from the options database */ 1602 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1603 { 1604 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1605 PetscErrorCode ierr; 1606 PetscMUMPSInt icntl=0; 1607 PetscInt info[80],i,ninfo=80; 1608 PetscBool flg=PETSC_FALSE; 1609 1610 PetscFunctionBegin; 1611 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1612 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1613 if (flg) mumps->id.ICNTL(1) = icntl; 1614 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1615 if (flg) mumps->id.ICNTL(2) = icntl; 1616 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1617 if (flg) mumps->id.ICNTL(3) = icntl; 1618 1619 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1620 if (flg) mumps->id.ICNTL(4) = icntl; 1621 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1622 1623 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1624 if (flg) mumps->id.ICNTL(6) = icntl; 1625 1626 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1627 if (flg) { 1628 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1629 else mumps->id.ICNTL(7) = icntl; 1630 } 1631 1632 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1633 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1634 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1635 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1636 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1637 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1638 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1639 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1640 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1641 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1642 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1643 } 1644 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1645 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1646 1647 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1648 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1649 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1650 if (mumps->id.ICNTL(24)) { 1651 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1652 } 1653 1654 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1655 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1656 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1657 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1658 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1659 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1660 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1661 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1662 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1663 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1664 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1665 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1666 1667 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1668 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1669 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1670 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1671 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1672 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1673 1674 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1675 1676 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1677 if (ninfo) { 1678 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1679 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1680 mumps->ninfo = ninfo; 1681 for (i=0; i<ninfo; i++) { 1682 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1683 else mumps->info[i] = info[i]; 1684 } 1685 } 1686 1687 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1692 { 1693 PetscErrorCode ierr; 1694 PetscInt nthreads=0; 1695 1696 PetscFunctionBegin; 1697 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1698 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1699 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1700 1701 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1702 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1703 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1704 if (mumps->use_petsc_omp_support) { 1705 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1706 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1707 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1708 #else 1709 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1710 #endif 1711 } else { 1712 mumps->omp_comm = PETSC_COMM_SELF; 1713 mumps->mumps_comm = mumps->petsc_comm; 1714 mumps->is_omp_master = PETSC_TRUE; 1715 } 1716 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1717 mumps->reqs = NULL; 1718 mumps->tag = 0; 1719 1720 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1721 mumps->id.job = JOB_INIT; 1722 mumps->id.par = 1; /* host participates factorizaton and solve */ 1723 mumps->id.sym = mumps->sym; 1724 1725 PetscMUMPS_c(mumps); 1726 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1727 1728 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1729 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1730 */ 1731 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1732 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1733 1734 mumps->scat_rhs = NULL; 1735 mumps->scat_sol = NULL; 1736 1737 /* set PETSc-MUMPS default options - override MUMPS default */ 1738 mumps->id.ICNTL(3) = 0; 1739 mumps->id.ICNTL(4) = 0; 1740 if (mumps->petsc_size == 1) { 1741 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1742 } else { 1743 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1744 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1745 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1746 } 1747 1748 /* schur */ 1749 mumps->id.size_schur = 0; 1750 mumps->id.listvar_schur = NULL; 1751 mumps->id.schur = NULL; 1752 mumps->sizeredrhs = 0; 1753 mumps->schur_sol = NULL; 1754 mumps->schur_sizesol = 0; 1755 PetscFunctionReturn(0); 1756 } 1757 1758 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1759 { 1760 PetscErrorCode ierr; 1761 1762 PetscFunctionBegin; 1763 if (mumps->id.INFOG(1) < 0) { 1764 if (A->erroriffailure) { 1765 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1766 } else { 1767 if (mumps->id.INFOG(1) == -6) { 1768 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1769 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1770 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1771 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1772 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1773 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1774 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1775 } else { 1776 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1777 F->factorerrortype = MAT_FACTOR_OTHER; 1778 } 1779 } 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1785 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1786 { 1787 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1788 PetscErrorCode ierr; 1789 Vec b; 1790 const PetscInt M = A->rmap->N; 1791 1792 PetscFunctionBegin; 1793 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1794 1795 /* Set MUMPS options from the options database */ 1796 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1797 1798 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1799 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1800 1801 /* analysis phase */ 1802 /*----------------*/ 1803 mumps->id.job = JOB_FACTSYMBOLIC; 1804 mumps->id.n = M; 1805 switch (mumps->id.ICNTL(18)) { 1806 case 0: /* centralized assembled matrix input */ 1807 if (!mumps->myid) { 1808 mumps->id.nnz = mumps->nnz; 1809 mumps->id.irn = mumps->irn; 1810 mumps->id.jcn = mumps->jcn; 1811 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1812 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1813 /* 1814 PetscBool flag; 1815 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1816 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1817 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1818 */ 1819 if (!mumps->myid) { 1820 const PetscInt *idx; 1821 PetscInt i; 1822 1823 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1824 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1825 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1826 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1827 } 1828 } 1829 } 1830 break; 1831 case 3: /* distributed assembled matrix input (size>1) */ 1832 mumps->id.nnz_loc = mumps->nnz; 1833 mumps->id.irn_loc = mumps->irn; 1834 mumps->id.jcn_loc = mumps->jcn; 1835 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1836 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1837 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1838 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1839 ierr = VecDestroy(&b);CHKERRQ(ierr); 1840 break; 1841 } 1842 PetscMUMPS_c(mumps); 1843 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1844 1845 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1846 F->ops->solve = MatSolve_MUMPS; 1847 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1848 F->ops->matsolve = MatMatSolve_MUMPS; 1849 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1850 PetscFunctionReturn(0); 1851 } 1852 1853 /* Note the Petsc r and c permutations are ignored */ 1854 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1855 { 1856 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1857 PetscErrorCode ierr; 1858 Vec b; 1859 const PetscInt M = A->rmap->N; 1860 1861 PetscFunctionBegin; 1862 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1863 1864 /* Set MUMPS options from the options database */ 1865 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1866 1867 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1868 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1869 1870 /* analysis phase */ 1871 /*----------------*/ 1872 mumps->id.job = JOB_FACTSYMBOLIC; 1873 mumps->id.n = M; 1874 switch (mumps->id.ICNTL(18)) { 1875 case 0: /* centralized assembled matrix input */ 1876 if (!mumps->myid) { 1877 mumps->id.nnz = mumps->nnz; 1878 mumps->id.irn = mumps->irn; 1879 mumps->id.jcn = mumps->jcn; 1880 if (mumps->id.ICNTL(6)>1) { 1881 mumps->id.a = (MumpsScalar*)mumps->val; 1882 } 1883 } 1884 break; 1885 case 3: /* distributed assembled matrix input (size>1) */ 1886 mumps->id.nnz_loc = mumps->nnz; 1887 mumps->id.irn_loc = mumps->irn; 1888 mumps->id.jcn_loc = mumps->jcn; 1889 if (mumps->id.ICNTL(6)>1) { 1890 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1891 } 1892 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1893 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1894 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1895 ierr = VecDestroy(&b);CHKERRQ(ierr); 1896 break; 1897 } 1898 PetscMUMPS_c(mumps); 1899 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1900 1901 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1902 F->ops->solve = MatSolve_MUMPS; 1903 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1904 PetscFunctionReturn(0); 1905 } 1906 1907 /* Note the Petsc r permutation and factor info are ignored */ 1908 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1909 { 1910 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1911 PetscErrorCode ierr; 1912 Vec b; 1913 const PetscInt M = A->rmap->N; 1914 1915 PetscFunctionBegin; 1916 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1917 1918 /* Set MUMPS options from the options database */ 1919 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1920 1921 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1922 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1923 1924 /* analysis phase */ 1925 /*----------------*/ 1926 mumps->id.job = JOB_FACTSYMBOLIC; 1927 mumps->id.n = M; 1928 switch (mumps->id.ICNTL(18)) { 1929 case 0: /* centralized assembled matrix input */ 1930 if (!mumps->myid) { 1931 mumps->id.nnz = mumps->nnz; 1932 mumps->id.irn = mumps->irn; 1933 mumps->id.jcn = mumps->jcn; 1934 if (mumps->id.ICNTL(6)>1) { 1935 mumps->id.a = (MumpsScalar*)mumps->val; 1936 } 1937 } 1938 break; 1939 case 3: /* distributed assembled matrix input (size>1) */ 1940 mumps->id.nnz_loc = mumps->nnz; 1941 mumps->id.irn_loc = mumps->irn; 1942 mumps->id.jcn_loc = mumps->jcn; 1943 if (mumps->id.ICNTL(6)>1) { 1944 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1945 } 1946 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1947 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1948 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1949 ierr = VecDestroy(&b);CHKERRQ(ierr); 1950 break; 1951 } 1952 PetscMUMPS_c(mumps); 1953 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1954 1955 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1956 F->ops->solve = MatSolve_MUMPS; 1957 F->ops->solvetranspose = MatSolve_MUMPS; 1958 F->ops->matsolve = MatMatSolve_MUMPS; 1959 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1960 #if defined(PETSC_USE_COMPLEX) 1961 F->ops->getinertia = NULL; 1962 #else 1963 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1964 #endif 1965 PetscFunctionReturn(0); 1966 } 1967 1968 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1969 { 1970 PetscErrorCode ierr; 1971 PetscBool iascii; 1972 PetscViewerFormat format; 1973 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1974 1975 PetscFunctionBegin; 1976 /* check if matrix is mumps type */ 1977 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1978 1979 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1980 if (iascii) { 1981 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1982 if (format == PETSC_VIEWER_ASCII_INFO) { 1983 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1984 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1985 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1986 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1987 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1988 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1989 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1990 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1991 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1992 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1993 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1994 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1995 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1996 if (mumps->id.ICNTL(11)>0) { 1997 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1998 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1999 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2000 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2001 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2002 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2003 } 2004 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2005 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2006 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2007 /* ICNTL(15-17) not used */ 2008 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2009 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2010 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2011 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2012 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2013 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2014 2015 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2016 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2017 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2018 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2019 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2020 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2021 2022 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2023 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2024 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2025 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2026 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2027 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2028 2029 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2030 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2031 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2032 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2033 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2034 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2035 2036 /* infomation local to each processor */ 2037 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2038 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2039 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2040 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2041 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2042 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2043 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2044 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2045 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2046 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2047 2048 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2049 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2050 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2051 2052 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2053 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2054 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2055 2056 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2057 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2058 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2059 2060 if (mumps->ninfo && mumps->ninfo <= 80){ 2061 PetscInt i; 2062 for (i=0; i<mumps->ninfo; i++){ 2063 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 2064 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2065 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2066 } 2067 } 2068 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2069 2070 if (!mumps->myid) { /* information from the host */ 2071 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2072 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2073 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2074 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2075 2076 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 2077 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 2078 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 2079 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 2080 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 2081 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 2082 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 2083 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 2084 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 2085 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 2086 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 2087 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 2088 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 2089 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 2090 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 2091 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 2092 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 2093 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 2094 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 2095 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 2096 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 2097 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 2098 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 2099 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2100 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2101 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2102 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2103 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2104 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2105 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2106 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 2107 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 2108 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 2109 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 2110 } 2111 } 2112 } 2113 PetscFunctionReturn(0); 2114 } 2115 2116 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2117 { 2118 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2119 2120 PetscFunctionBegin; 2121 info->block_size = 1.0; 2122 info->nz_allocated = mumps->id.INFOG(20); 2123 info->nz_used = mumps->id.INFOG(20); 2124 info->nz_unneeded = 0.0; 2125 info->assemblies = 0.0; 2126 info->mallocs = 0.0; 2127 info->memory = 0.0; 2128 info->fill_ratio_given = 0; 2129 info->fill_ratio_needed = 0; 2130 info->factor_mallocs = 0; 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /* -------------------------------------------------------------------------------------------*/ 2135 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2136 { 2137 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2138 const PetscScalar *arr; 2139 const PetscInt *idxs; 2140 PetscInt size,i; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2145 if (mumps->petsc_size > 1) { 2146 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2147 2148 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2149 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 2150 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 2151 } 2152 2153 /* Schur complement matrix */ 2154 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2156 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2157 mumps->id.schur = (MumpsScalar*)arr; 2158 mumps->id.size_schur = size; 2159 mumps->id.schur_lld = size; 2160 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2161 if (mumps->sym == 1) { 2162 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2163 } 2164 2165 /* MUMPS expects Fortran style indices */ 2166 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2167 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2168 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2169 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2170 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2171 if (mumps->petsc_size > 1) { 2172 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2173 } else { 2174 if (F->factortype == MAT_FACTOR_LU) { 2175 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2176 } else { 2177 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2178 } 2179 } 2180 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2181 mumps->id.ICNTL(26) = -1; 2182 PetscFunctionReturn(0); 2183 } 2184 2185 /* -------------------------------------------------------------------------------------------*/ 2186 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2187 { 2188 Mat St; 2189 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2190 PetscScalar *array; 2191 #if defined(PETSC_USE_COMPLEX) 2192 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2193 #endif 2194 PetscErrorCode ierr; 2195 2196 PetscFunctionBegin; 2197 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2198 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2199 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2200 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2201 ierr = MatSetUp(St);CHKERRQ(ierr); 2202 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2203 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2204 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2205 PetscInt i,j,N=mumps->id.size_schur; 2206 for (i=0;i<N;i++) { 2207 for (j=0;j<N;j++) { 2208 #if !defined(PETSC_USE_COMPLEX) 2209 PetscScalar val = mumps->id.schur[i*N+j]; 2210 #else 2211 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2212 #endif 2213 array[j*N+i] = val; 2214 } 2215 } 2216 } else { /* stored by columns */ 2217 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2218 } 2219 } else { /* either full or lower-triangular (not packed) */ 2220 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2221 PetscInt i,j,N=mumps->id.size_schur; 2222 for (i=0;i<N;i++) { 2223 for (j=i;j<N;j++) { 2224 #if !defined(PETSC_USE_COMPLEX) 2225 PetscScalar val = mumps->id.schur[i*N+j]; 2226 #else 2227 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2228 #endif 2229 array[i*N+j] = val; 2230 array[j*N+i] = val; 2231 } 2232 } 2233 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2234 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2235 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2236 PetscInt i,j,N=mumps->id.size_schur; 2237 for (i=0;i<N;i++) { 2238 for (j=0;j<i+1;j++) { 2239 #if !defined(PETSC_USE_COMPLEX) 2240 PetscScalar val = mumps->id.schur[i*N+j]; 2241 #else 2242 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2243 #endif 2244 array[i*N+j] = val; 2245 array[j*N+i] = val; 2246 } 2247 } 2248 } 2249 } 2250 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2251 *S = St; 2252 PetscFunctionReturn(0); 2253 } 2254 2255 /* -------------------------------------------------------------------------------------------*/ 2256 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2257 { 2258 PetscErrorCode ierr; 2259 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2260 2261 PetscFunctionBegin; 2262 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2267 { 2268 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2269 2270 PetscFunctionBegin; 2271 *ival = mumps->id.ICNTL(icntl); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 /*@ 2276 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2277 2278 Logically Collective on Mat 2279 2280 Input Parameters: 2281 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2282 . icntl - index of MUMPS parameter array ICNTL() 2283 - ival - value of MUMPS ICNTL(icntl) 2284 2285 Options Database: 2286 . -mat_mumps_icntl_<icntl> <ival> 2287 2288 Level: beginner 2289 2290 References: 2291 . MUMPS Users' Guide 2292 2293 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2294 @*/ 2295 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2296 { 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 PetscValidType(F,1); 2301 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2302 PetscValidLogicalCollectiveInt(F,icntl,2); 2303 PetscValidLogicalCollectiveInt(F,ival,3); 2304 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 2308 /*@ 2309 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2310 2311 Logically Collective on Mat 2312 2313 Input Parameters: 2314 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2315 - icntl - index of MUMPS parameter array ICNTL() 2316 2317 Output Parameter: 2318 . ival - value of MUMPS ICNTL(icntl) 2319 2320 Level: beginner 2321 2322 References: 2323 . MUMPS Users' Guide 2324 2325 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2326 @*/ 2327 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2328 { 2329 PetscErrorCode ierr; 2330 2331 PetscFunctionBegin; 2332 PetscValidType(F,1); 2333 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2334 PetscValidLogicalCollectiveInt(F,icntl,2); 2335 PetscValidIntPointer(ival,3); 2336 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 /* -------------------------------------------------------------------------------------------*/ 2341 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2342 { 2343 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2344 2345 PetscFunctionBegin; 2346 mumps->id.CNTL(icntl) = val; 2347 PetscFunctionReturn(0); 2348 } 2349 2350 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2351 { 2352 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2353 2354 PetscFunctionBegin; 2355 *val = mumps->id.CNTL(icntl); 2356 PetscFunctionReturn(0); 2357 } 2358 2359 /*@ 2360 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2361 2362 Logically Collective on Mat 2363 2364 Input Parameters: 2365 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2366 . icntl - index of MUMPS parameter array CNTL() 2367 - val - value of MUMPS CNTL(icntl) 2368 2369 Options Database: 2370 . -mat_mumps_cntl_<icntl> <val> 2371 2372 Level: beginner 2373 2374 References: 2375 . MUMPS Users' Guide 2376 2377 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2378 @*/ 2379 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2380 { 2381 PetscErrorCode ierr; 2382 2383 PetscFunctionBegin; 2384 PetscValidType(F,1); 2385 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2386 PetscValidLogicalCollectiveInt(F,icntl,2); 2387 PetscValidLogicalCollectiveReal(F,val,3); 2388 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 /*@ 2393 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2394 2395 Logically Collective on Mat 2396 2397 Input Parameters: 2398 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2399 - icntl - index of MUMPS parameter array CNTL() 2400 2401 Output Parameter: 2402 . val - value of MUMPS CNTL(icntl) 2403 2404 Level: beginner 2405 2406 References: 2407 . MUMPS Users' Guide 2408 2409 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2410 @*/ 2411 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2412 { 2413 PetscErrorCode ierr; 2414 2415 PetscFunctionBegin; 2416 PetscValidType(F,1); 2417 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2418 PetscValidLogicalCollectiveInt(F,icntl,2); 2419 PetscValidRealPointer(val,3); 2420 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2425 { 2426 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2427 2428 PetscFunctionBegin; 2429 *info = mumps->id.INFO(icntl); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2434 { 2435 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2436 2437 PetscFunctionBegin; 2438 *infog = mumps->id.INFOG(icntl); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2443 { 2444 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2445 2446 PetscFunctionBegin; 2447 *rinfo = mumps->id.RINFO(icntl); 2448 PetscFunctionReturn(0); 2449 } 2450 2451 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2452 { 2453 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2454 2455 PetscFunctionBegin; 2456 *rinfog = mumps->id.RINFOG(icntl); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2461 { 2462 PetscErrorCode ierr; 2463 Mat Bt = NULL,Btseq = NULL; 2464 PetscBool flg; 2465 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2466 PetscScalar *aa; 2467 PetscInt spnr,*ia,*ja,M,nrhs; 2468 2469 PetscFunctionBegin; 2470 PetscValidIntPointer(spRHS,2); 2471 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2472 if (flg) { 2473 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2474 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2475 2476 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2477 2478 if (mumps->petsc_size > 1) { 2479 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2480 Btseq = b->A; 2481 } else { 2482 Btseq = Bt; 2483 } 2484 2485 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2486 mumps->id.nrhs = nrhs; 2487 mumps->id.lrhs = M; 2488 mumps->id.rhs = NULL; 2489 2490 if (!mumps->myid) { 2491 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2492 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2493 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2494 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2495 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2496 } else { 2497 mumps->id.irhs_ptr = NULL; 2498 mumps->id.irhs_sparse = NULL; 2499 mumps->id.nz_rhs = 0; 2500 mumps->id.rhs_sparse = NULL; 2501 } 2502 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2503 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2504 2505 /* solve phase */ 2506 /*-------------*/ 2507 mumps->id.job = JOB_SOLVE; 2508 PetscMUMPS_c(mumps); 2509 if (mumps->id.INFOG(1) < 0) 2510 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2511 2512 if (!mumps->myid) { 2513 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2514 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2515 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2516 } 2517 PetscFunctionReturn(0); 2518 } 2519 2520 /*@ 2521 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2522 2523 Logically Collective on Mat 2524 2525 Input Parameters: 2526 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2527 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2528 2529 Output Parameter: 2530 . spRHS - requested entries of inverse of A 2531 2532 Level: beginner 2533 2534 References: 2535 . MUMPS Users' Guide 2536 2537 .seealso: MatGetFactor(), MatCreateTranspose() 2538 @*/ 2539 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2540 { 2541 PetscErrorCode ierr; 2542 2543 PetscFunctionBegin; 2544 PetscValidType(F,1); 2545 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2546 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2551 { 2552 PetscErrorCode ierr; 2553 Mat spRHS; 2554 2555 PetscFunctionBegin; 2556 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2557 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2558 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2559 PetscFunctionReturn(0); 2560 } 2561 2562 /*@ 2563 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2564 2565 Logically Collective on Mat 2566 2567 Input Parameters: 2568 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2569 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2570 2571 Output Parameter: 2572 . spRHST - requested entries of inverse of A^T 2573 2574 Level: beginner 2575 2576 References: 2577 . MUMPS Users' Guide 2578 2579 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2580 @*/ 2581 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2582 { 2583 PetscErrorCode ierr; 2584 PetscBool flg; 2585 2586 PetscFunctionBegin; 2587 PetscValidType(F,1); 2588 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2589 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2590 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2591 2592 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2593 PetscFunctionReturn(0); 2594 } 2595 2596 /*@ 2597 MatMumpsGetInfo - Get MUMPS parameter INFO() 2598 2599 Logically Collective on Mat 2600 2601 Input Parameters: 2602 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2603 - icntl - index of MUMPS parameter array INFO() 2604 2605 Output Parameter: 2606 . ival - value of MUMPS INFO(icntl) 2607 2608 Level: beginner 2609 2610 References: 2611 . MUMPS Users' Guide 2612 2613 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2614 @*/ 2615 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2616 { 2617 PetscErrorCode ierr; 2618 2619 PetscFunctionBegin; 2620 PetscValidType(F,1); 2621 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2622 PetscValidIntPointer(ival,3); 2623 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 /*@ 2628 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2629 2630 Logically Collective on Mat 2631 2632 Input Parameters: 2633 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2634 - icntl - index of MUMPS parameter array INFOG() 2635 2636 Output Parameter: 2637 . ival - value of MUMPS INFOG(icntl) 2638 2639 Level: beginner 2640 2641 References: 2642 . MUMPS Users' Guide 2643 2644 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2645 @*/ 2646 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2647 { 2648 PetscErrorCode ierr; 2649 2650 PetscFunctionBegin; 2651 PetscValidType(F,1); 2652 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2653 PetscValidIntPointer(ival,3); 2654 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 /*@ 2659 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2660 2661 Logically Collective on Mat 2662 2663 Input Parameters: 2664 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2665 - icntl - index of MUMPS parameter array RINFO() 2666 2667 Output Parameter: 2668 . val - value of MUMPS RINFO(icntl) 2669 2670 Level: beginner 2671 2672 References: 2673 . MUMPS Users' Guide 2674 2675 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2676 @*/ 2677 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2678 { 2679 PetscErrorCode ierr; 2680 2681 PetscFunctionBegin; 2682 PetscValidType(F,1); 2683 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2684 PetscValidRealPointer(val,3); 2685 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2686 PetscFunctionReturn(0); 2687 } 2688 2689 /*@ 2690 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2691 2692 Logically Collective on Mat 2693 2694 Input Parameters: 2695 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2696 - icntl - index of MUMPS parameter array RINFOG() 2697 2698 Output Parameter: 2699 . val - value of MUMPS RINFOG(icntl) 2700 2701 Level: beginner 2702 2703 References: 2704 . MUMPS Users' Guide 2705 2706 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2707 @*/ 2708 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2709 { 2710 PetscErrorCode ierr; 2711 2712 PetscFunctionBegin; 2713 PetscValidType(F,1); 2714 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2715 PetscValidRealPointer(val,3); 2716 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2717 PetscFunctionReturn(0); 2718 } 2719 2720 /*MC 2721 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2722 distributed and sequential matrices via the external package MUMPS. 2723 2724 Works with MATAIJ and MATSBAIJ matrices 2725 2726 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2727 2728 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2729 2730 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2731 2732 Options Database Keys: 2733 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2734 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2735 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2736 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2737 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2738 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2739 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2740 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2741 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2742 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2743 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2744 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2745 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2746 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2747 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2748 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2749 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2750 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2751 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2752 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2753 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2754 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2755 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2756 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2757 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2758 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2759 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2760 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2761 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2762 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2763 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2764 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2765 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2766 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2767 2768 Level: beginner 2769 2770 Notes: 2771 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2772 2773 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2774 $ KSPGetPC(ksp,&pc); 2775 $ PCFactorGetMatrix(pc,&mat); 2776 $ MatMumpsGetInfo(mat,....); 2777 $ MatMumpsGetInfog(mat,....); etc. 2778 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2779 2780 Two modes to run MUMPS/PETSc with OpenMP 2781 2782 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2783 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2784 2785 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2786 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2787 2788 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2789 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2790 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2791 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2792 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2793 2794 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2795 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2796 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2797 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2798 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2799 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2800 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2801 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2802 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2803 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2804 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2805 examine the mapping result. 2806 2807 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2808 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2809 calls omp_set_num_threads(m) internally before calling MUMPS. 2810 2811 References: 2812 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2813 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2814 2815 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2816 2817 M*/ 2818 2819 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2820 { 2821 PetscFunctionBegin; 2822 *type = MATSOLVERMUMPS; 2823 PetscFunctionReturn(0); 2824 } 2825 2826 /* MatGetFactor for Seq and MPI AIJ matrices */ 2827 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2828 { 2829 Mat B; 2830 PetscErrorCode ierr; 2831 Mat_MUMPS *mumps; 2832 PetscBool isSeqAIJ; 2833 2834 PetscFunctionBegin; 2835 #if defined(PETSC_USE_COMPLEX) 2836 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2837 #endif 2838 /* Create the factorization matrix */ 2839 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2840 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2841 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2842 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2843 ierr = MatSetUp(B);CHKERRQ(ierr); 2844 2845 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2846 2847 B->ops->view = MatView_MUMPS; 2848 B->ops->getinfo = MatGetInfo_MUMPS; 2849 2850 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2851 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2852 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2853 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2854 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2855 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2856 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2857 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2858 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2859 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2860 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2861 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2862 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2863 2864 if (ftype == MAT_FACTOR_LU) { 2865 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2866 B->factortype = MAT_FACTOR_LU; 2867 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2868 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2869 mumps->sym = 0; 2870 } else { 2871 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2872 B->factortype = MAT_FACTOR_CHOLESKY; 2873 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2874 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2875 #if defined(PETSC_USE_COMPLEX) 2876 mumps->sym = 2; 2877 #else 2878 if (A->spd_set && A->spd) mumps->sym = 1; 2879 else mumps->sym = 2; 2880 #endif 2881 } 2882 2883 /* set solvertype */ 2884 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2885 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2886 2887 B->ops->destroy = MatDestroy_MUMPS; 2888 B->data = (void*)mumps; 2889 2890 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2891 2892 *F = B; 2893 PetscFunctionReturn(0); 2894 } 2895 2896 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2897 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2898 { 2899 Mat B; 2900 PetscErrorCode ierr; 2901 Mat_MUMPS *mumps; 2902 PetscBool isSeqSBAIJ; 2903 2904 PetscFunctionBegin; 2905 #if defined(PETSC_USE_COMPLEX) 2906 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2907 #endif 2908 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2909 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2910 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2911 ierr = MatSetUp(B);CHKERRQ(ierr); 2912 2913 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2914 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2915 if (isSeqSBAIJ) { 2916 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2917 } else { 2918 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2919 } 2920 2921 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2922 B->ops->view = MatView_MUMPS; 2923 B->ops->getinfo = MatGetInfo_MUMPS; 2924 2925 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2926 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2927 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2928 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2929 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2930 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2931 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2932 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2933 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2934 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2935 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2936 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2937 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2938 2939 B->factortype = MAT_FACTOR_CHOLESKY; 2940 #if defined(PETSC_USE_COMPLEX) 2941 mumps->sym = 2; 2942 #else 2943 if (A->spd_set && A->spd) mumps->sym = 1; 2944 else mumps->sym = 2; 2945 #endif 2946 2947 /* set solvertype */ 2948 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2949 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2950 2951 B->ops->destroy = MatDestroy_MUMPS; 2952 B->data = (void*)mumps; 2953 2954 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2955 2956 *F = B; 2957 PetscFunctionReturn(0); 2958 } 2959 2960 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2961 { 2962 Mat B; 2963 PetscErrorCode ierr; 2964 Mat_MUMPS *mumps; 2965 PetscBool isSeqBAIJ; 2966 2967 PetscFunctionBegin; 2968 /* Create the factorization matrix */ 2969 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2970 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2971 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2972 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2973 ierr = MatSetUp(B);CHKERRQ(ierr); 2974 2975 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2976 if (ftype == MAT_FACTOR_LU) { 2977 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2978 B->factortype = MAT_FACTOR_LU; 2979 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2980 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2981 mumps->sym = 0; 2982 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2983 2984 B->ops->view = MatView_MUMPS; 2985 B->ops->getinfo = MatGetInfo_MUMPS; 2986 2987 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2988 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2989 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2990 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2991 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2992 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2994 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2995 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2996 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2998 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2999 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3000 3001 /* set solvertype */ 3002 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3003 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3004 3005 B->ops->destroy = MatDestroy_MUMPS; 3006 B->data = (void*)mumps; 3007 3008 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3009 3010 *F = B; 3011 PetscFunctionReturn(0); 3012 } 3013 3014 /* MatGetFactor for Seq and MPI SELL matrices */ 3015 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3016 { 3017 Mat B; 3018 PetscErrorCode ierr; 3019 Mat_MUMPS *mumps; 3020 PetscBool isSeqSELL; 3021 3022 PetscFunctionBegin; 3023 /* Create the factorization matrix */ 3024 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3025 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3026 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3027 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3028 ierr = MatSetUp(B);CHKERRQ(ierr); 3029 3030 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3031 3032 B->ops->view = MatView_MUMPS; 3033 B->ops->getinfo = MatGetInfo_MUMPS; 3034 3035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3046 3047 if (ftype == MAT_FACTOR_LU) { 3048 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3049 B->factortype = MAT_FACTOR_LU; 3050 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3051 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3052 mumps->sym = 0; 3053 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3054 3055 /* set solvertype */ 3056 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3057 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3058 3059 B->ops->destroy = MatDestroy_MUMPS; 3060 B->data = (void*)mumps; 3061 3062 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3063 3064 *F = B; 3065 PetscFunctionReturn(0); 3066 } 3067 3068 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3069 { 3070 PetscErrorCode ierr; 3071 3072 PetscFunctionBegin; 3073 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3074 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3075 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3076 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3077 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3078 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3079 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3080 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3081 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3082 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3083 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3084 PetscFunctionReturn(0); 3085 } 3086 3087