1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 PetscMPIInt myid,size; 36 PetscInt *irn,*jcn,sym,nSolve; 37 PetscScalar *val; 38 MPI_Comm comm_mumps; 39 VecScatter scat_rhs, scat_sol; 40 PetscTruth isAIJ,CleanUpMUMPS; 41 Vec b_seq,x_seq; 42 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 PetscErrorCode (*MatView)(Mat,PetscViewer); 44 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 PetscErrorCode (*specialdestroy)(Mat); 49 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 50 } Mat_MUMPS; 51 52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 53 EXTERN_C_BEGIN 54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 55 EXTERN_C_END 56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 57 /* 58 input: 59 A - matrix in mpiaij or mpisbaij (bs=1) format 60 shift - 0: C style output triple; 1: Fortran style output triple. 61 valOnly - FALSE: spaces are allocated and values are set for the triple 62 TRUE: only the values in v array are updated 63 output: 64 nnz - dim of r, c, and v (number of local nonzero entries of A) 65 r, c, v - row and col index, matrix values (matrix triples) 66 */ 67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 68 PetscInt *ai, *aj, *bi, *bj, rstart,nz, *garray; 69 PetscErrorCode ierr; 70 PetscInt i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol; 71 PetscInt *row,*col; 72 PetscScalar *av, *bv,*val; 73 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 74 75 PetscFunctionBegin; 76 if (mumps->isAIJ){ 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 78 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 79 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 80 nz = aa->nz + bb->nz; 81 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 82 garray = mat->garray; 83 av=aa->a; bv=bb->a; 84 85 } else { 86 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 87 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 88 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 89 if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs); 90 nz = aa->nz + bb->nz; 91 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 92 garray = mat->garray; 93 av=aa->a; bv=bb->a; 94 } 95 96 if (!valOnly){ 97 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 98 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 99 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 100 *r = row; *c = col; *v = val; 101 } else { 102 row = *r; col = *c; val = *v; 103 } 104 *nnz = nz; 105 106 jj = 0; irow = rstart; 107 for ( i=0; i<m; i++ ) { 108 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 109 countA = ai[i+1] - ai[i]; 110 countB = bi[i+1] - bi[i]; 111 bjj = bj + bi[i]; 112 113 /* get jB, the starting local col index for the 2nd B-part */ 114 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 115 j=-1; 116 do { 117 j++; 118 if (j == countB) break; 119 jcol = garray[bjj[j]]; 120 } while (jcol < colA_start); 121 jB = j; 122 123 /* B-part, smaller col index */ 124 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 125 for (j=0; j<jB; j++){ 126 jcol = garray[bjj[j]]; 127 if (!valOnly){ 128 row[jj] = irow + shift; col[jj] = jcol + shift; 129 130 } 131 val[jj++] = *bv++; 132 } 133 /* A-part */ 134 for (j=0; j<countA; j++){ 135 if (!valOnly){ 136 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 137 } 138 val[jj++] = *av++; 139 } 140 /* B-part, larger col index */ 141 for (j=jB; j<countB; j++){ 142 if (!valOnly){ 143 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 144 } 145 val[jj++] = *bv++; 146 } 147 irow++; 148 } 149 150 PetscFunctionReturn(0); 151 } 152 153 EXTERN_C_BEGIN 154 #undef __FUNCT__ 155 #define __FUNCT__ "MatConvert_MUMPS_Base" 156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 157 { 158 PetscErrorCode ierr; 159 Mat B=*newmat; 160 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 161 void (*f)(void); 162 163 PetscFunctionBegin; 164 if (reuse == MAT_INITIAL_MATRIX) { 165 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 166 } 167 B->ops->duplicate = mumps->MatDuplicate; 168 B->ops->view = mumps->MatView; 169 B->ops->assemblyend = mumps->MatAssemblyEnd; 170 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 171 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 172 B->ops->destroy = mumps->MatDestroy; 173 174 /* put back original composed preallocation function */ 175 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 176 if (f) { 177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr); 178 } 179 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 187 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 188 189 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 190 *newmat = B; 191 PetscFunctionReturn(0); 192 } 193 EXTERN_C_END 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatDestroy_MUMPS" 197 PetscErrorCode MatDestroy_MUMPS(Mat A) 198 { 199 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 200 PetscErrorCode ierr; 201 PetscMPIInt size=lu->size; 202 PetscErrorCode (*specialdestroy)(Mat); 203 PetscFunctionBegin; 204 if (lu->CleanUpMUMPS) { 205 /* Terminate instance, deallocate memories */ 206 if (size > 1){ 207 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 208 ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr); 209 ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr); 210 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 211 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 212 ierr = PetscFree(lu->val);CHKERRQ(ierr); 213 } 214 lu->id.job=JOB_END; 215 #if defined(PETSC_USE_COMPLEX) 216 zmumps_c(&lu->id); 217 #else 218 dmumps_c(&lu->id); 219 #endif 220 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 221 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 222 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 223 } 224 specialdestroy = lu->specialdestroy; 225 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 226 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 227 PetscFunctionReturn(0); 228 } 229 230 #undef __FUNCT__ 231 #define __FUNCT__ "MatDestroy_AIJMUMPS" 232 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 233 { 234 PetscErrorCode ierr; 235 PetscMPIInt size; 236 237 PetscFunctionBegin; 238 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 239 if (size==1) { 240 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 241 } else { 242 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 243 } 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 249 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 250 { 251 PetscErrorCode ierr; 252 PetscMPIInt size; 253 254 PetscFunctionBegin; 255 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 256 if (size==1) { 257 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 258 } else { 259 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 260 } 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatSolve_MUMPS" 266 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) { 267 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 268 PetscScalar *array; 269 Vec x_seq; 270 IS is_iden,is_petsc; 271 VecScatter scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol; 272 PetscErrorCode ierr; 273 PetscInt i; 274 275 PetscFunctionBegin; 276 lu->id.nrhs = 1; 277 x_seq = lu->b_seq; 278 if (lu->size > 1){ 279 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 280 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 281 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 282 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 283 } else { /* size == 1 */ 284 ierr = VecCopy(b,x);CHKERRQ(ierr); 285 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 286 } 287 if (!lu->myid) { /* define rhs on the host */ 288 #if defined(PETSC_USE_COMPLEX) 289 lu->id.rhs = (mumps_double_complex*)array; 290 #else 291 lu->id.rhs = array; 292 #endif 293 } 294 if (lu->size == 1){ 295 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 296 } else if (!lu->myid){ 297 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 298 } 299 300 if (lu->size > 1){ 301 /* distributed solution */ 302 lu->id.ICNTL(21) = 1; 303 if (!lu->nSolve){ 304 /* Create x_seq=sol_loc for repeated use */ 305 PetscInt lsol_loc; 306 PetscScalar *sol_loc; 307 lsol_loc = lu->id.INFO(23); /* length of sol_loc */ 308 ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr); 309 lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc); 310 lu->id.lsol_loc = lsol_loc; 311 lu->id.sol_loc = (F_DOUBLE *)sol_loc; 312 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr); 313 } 314 } 315 316 /* solve phase */ 317 /*-------------*/ 318 lu->id.job = 3; 319 #if defined(PETSC_USE_COMPLEX) 320 zmumps_c(&lu->id); 321 #else 322 dmumps_c(&lu->id); 323 #endif 324 if (lu->id.INFOG(1) < 0) { 325 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 326 } 327 328 if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */ 329 if (!lu->nSolve){ /* create scatter scat_sol */ 330 ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 331 for (i=0; i<lu->id.lsol_loc; i++){ 332 lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 333 } 334 ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr); /* to */ 335 ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr); 336 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 337 ierr = ISDestroy(is_petsc);CHKERRQ(ierr); 338 } 339 ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 340 ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 341 } 342 lu->nSolve++; 343 PetscFunctionReturn(0); 344 } 345 346 /* 347 input: 348 F: numeric factor 349 output: 350 nneg: total number of negative pivots 351 nzero: 0 352 npos: (global dimension of F) - nneg 353 */ 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 357 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 358 { 359 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 360 PetscErrorCode ierr; 361 PetscMPIInt size; 362 363 PetscFunctionBegin; 364 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 365 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 366 if (size > 1 && lu->id.ICNTL(13) != 1){ 367 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 368 } 369 if (nneg){ 370 if (!lu->myid){ 371 *nneg = lu->id.INFOG(12); 372 } 373 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 374 } 375 if (nzero) *nzero = 0; 376 if (npos) *npos = F->rmap.N - (*nneg); 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatFactorNumeric_MUMPS" 382 PetscErrorCode MatFactorNumeric_MUMPS(Mat A,MatFactorInfo *info,Mat *F) 383 { 384 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 385 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 386 PetscErrorCode ierr; 387 PetscInt rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl; 388 PetscTruth valOnly,flg; 389 Mat F_diag; 390 391 PetscFunctionBegin; 392 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 393 (*F)->ops->solve = MatSolve_MUMPS; 394 395 /* Initialize a MUMPS instance */ 396 ierr = MPI_Comm_rank(A->comm, &lu->myid); 397 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 398 lua->myid = lu->myid; lua->size = lu->size; 399 lu->id.job = JOB_INIT; 400 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 401 ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr); 402 403 /* Set mumps options */ 404 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 405 lu->id.par=1; /* host participates factorizaton and solve */ 406 lu->id.sym=lu->sym; 407 if (lu->sym == 2){ 408 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 409 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 410 } 411 #if defined(PETSC_USE_COMPLEX) 412 zmumps_c(&lu->id); 413 #else 414 dmumps_c(&lu->id); 415 #endif 416 417 if (lu->size == 1){ 418 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 419 } else { 420 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 421 } 422 423 icntl=-1; 424 lu->id.ICNTL(4) = 0; /* level of printing; overwrite mumps default ICNTL(4)=2 */ 425 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 426 if ((flg && icntl > 0) || PetscLogPrintInfo) { 427 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 428 } else { /* no output */ 429 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 430 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 431 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 432 } 433 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 434 icntl=-1; 435 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 436 if (flg) { 437 if (icntl== 1){ 438 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 439 } else { 440 lu->id.ICNTL(7) = icntl; 441 } 442 } 443 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 444 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 445 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 446 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 447 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 448 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 449 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 450 451 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 452 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 453 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 454 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);CHKERRQ(ierr); 455 PetscOptionsEnd(); 456 } 457 458 /* define matrix A */ 459 switch (lu->id.ICNTL(18)){ 460 case 0: /* centralized assembled matrix input (size=1) */ 461 if (!lu->myid) { 462 if (lua->isAIJ){ 463 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 464 nz = aa->nz; 465 ai = aa->i; aj = aa->j; lu->val = aa->a; 466 } else { 467 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 468 nz = aa->nz; 469 ai = aa->i; aj = aa->j; lu->val = aa->a; 470 } 471 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 472 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 473 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 474 nz = 0; 475 for (i=0; i<M; i++){ 476 rnz = ai[i+1] - ai[i]; 477 while (rnz--) { /* Fortran row/col index! */ 478 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 479 } 480 } 481 } 482 } 483 break; 484 case 3: /* distributed assembled matrix input (size>1) */ 485 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 486 valOnly = PETSC_FALSE; 487 } else { 488 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 489 } 490 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 491 break; 492 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 493 } 494 495 /* analysis phase */ 496 /*----------------*/ 497 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 498 lu->id.job = 1; 499 500 lu->id.n = M; 501 switch (lu->id.ICNTL(18)){ 502 case 0: /* centralized assembled matrix input */ 503 if (!lu->myid) { 504 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 505 if (lu->id.ICNTL(6)>1){ 506 #if defined(PETSC_USE_COMPLEX) 507 lu->id.a = (mumps_double_complex*)lu->val; 508 #else 509 lu->id.a = lu->val; 510 #endif 511 } 512 } 513 break; 514 case 3: /* distributed assembled matrix input (size>1) */ 515 lu->id.nz_loc = nnz; 516 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 517 if (lu->id.ICNTL(6)>1) { 518 #if defined(PETSC_USE_COMPLEX) 519 lu->id.a_loc = (mumps_double_complex*)lu->val; 520 #else 521 lu->id.a_loc = lu->val; 522 #endif 523 } 524 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 525 IS is_iden; 526 Vec b; 527 if (!lu->myid){ 528 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr); 529 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr); 530 } else { 531 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr); 532 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 533 } 534 ierr = VecCreate(A->comm,&b);CHKERRQ(ierr); 535 ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr); 536 ierr = VecSetFromOptions(b);CHKERRQ(ierr); 537 538 ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr); 539 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 540 ierr = VecDestroy(b);CHKERRQ(ierr); 541 break; 542 } 543 #if defined(PETSC_USE_COMPLEX) 544 zmumps_c(&lu->id); 545 #else 546 dmumps_c(&lu->id); 547 #endif 548 if (lu->id.INFOG(1) < 0) { 549 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 550 } 551 } 552 553 /* numerical factorization phase */ 554 /*-------------------------------*/ 555 lu->id.job = 2; 556 if(!lu->id.ICNTL(18)) { 557 if (!lu->myid) { 558 #if defined(PETSC_USE_COMPLEX) 559 lu->id.a = (mumps_double_complex*)lu->val; 560 #else 561 lu->id.a = lu->val; 562 #endif 563 } 564 } else { 565 #if defined(PETSC_USE_COMPLEX) 566 lu->id.a_loc = (mumps_double_complex*)lu->val; 567 #else 568 lu->id.a_loc = lu->val; 569 #endif 570 } 571 #if defined(PETSC_USE_COMPLEX) 572 zmumps_c(&lu->id); 573 #else 574 dmumps_c(&lu->id); 575 #endif 576 if (lu->id.INFOG(1) < 0) { 577 if (lu->id.INFO(1) == -13) { 578 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 579 } else { 580 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 581 } 582 } 583 584 if (!lu->myid && lu->id.ICNTL(16) > 0){ 585 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 586 } 587 588 if (lu->size > 1){ 589 if ((*F)->factor == FACTOR_LU){ 590 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 591 } else { 592 F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A; 593 } 594 F_diag->assembled = PETSC_TRUE; 595 if (lu->nSolve){ 596 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 597 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 598 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 599 } 600 } 601 (*F)->assembled = PETSC_TRUE; 602 lu->matstruc = SAME_NONZERO_PATTERN; 603 lu->CleanUpMUMPS = PETSC_TRUE; 604 lu->nSolve = 0; 605 PetscFunctionReturn(0); 606 } 607 608 /* Note the Petsc r and c permutations are ignored */ 609 #undef __FUNCT__ 610 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 611 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 612 Mat B; 613 Mat_MUMPS *lu; 614 PetscErrorCode ierr; 615 616 PetscFunctionBegin; 617 /* Create the factorization matrix */ 618 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 619 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 620 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 621 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 622 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 623 624 B->ops->lufactornumeric = MatFactorNumeric_MUMPS; 625 B->factor = FACTOR_LU; 626 lu = (Mat_MUMPS*)B->spptr; 627 lu->sym = 0; 628 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 629 630 *F = B; 631 PetscFunctionReturn(0); 632 } 633 634 /* Note the Petsc r permutation is ignored */ 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 637 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 638 Mat B; 639 Mat_MUMPS *lu; 640 PetscErrorCode ierr; 641 642 PetscFunctionBegin; 643 /* Create the factorization matrix */ 644 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 645 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 646 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 647 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 648 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 649 650 B->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 651 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 652 B->factor = FACTOR_CHOLESKY; 653 lu = (Mat_MUMPS*)B->spptr; 654 lu->sym = 2; 655 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 656 657 *F = B; 658 PetscFunctionReturn(0); 659 } 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatFactorInfo_MUMPS" 663 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 664 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 665 PetscErrorCode ierr; 666 667 PetscFunctionBegin; 668 /* check if matrix is mumps type */ 669 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 670 671 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 672 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 673 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 674 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",lu->id.ICNTL(1));CHKERRQ(ierr); 675 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr); 676 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",lu->id.ICNTL(3));CHKERRQ(ierr); 677 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 678 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 680 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scalling strategy): %d \n",lu->id.ICNTL(8));CHKERRQ(ierr); 682 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 683 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 684 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 685 if (!lu->myid && lu->id.ICNTL(11)>0) { 686 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 687 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 688 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 689 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 690 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 691 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 692 693 } 694 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 695 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 696 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 697 /* ICNTL(15-17) not used */ 698 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 699 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Shur complement info): %d \n",lu->id.ICNTL(19));CHKERRQ(ierr); 700 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",lu->id.ICNTL(20));CHKERRQ(ierr); 701 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",lu->id.ICNTL(21));CHKERRQ(ierr); 702 703 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 704 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 706 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",lu->id.CNTL(4));CHKERRQ(ierr); 707 708 /* infomation local to each processor */ 709 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);} 710 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 711 ierr = PetscSynchronizedFlush(A->comm); 712 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);} 713 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 714 ierr = PetscSynchronizedFlush(A->comm); 715 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);} 716 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 717 ierr = PetscSynchronizedFlush(A->comm); 718 /* 719 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);} 720 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr); 721 ierr = PetscSynchronizedFlush(A->comm); 722 */ 723 724 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);} 725 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr); 726 ierr = PetscSynchronizedFlush(A->comm); 727 728 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);} 729 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr); 730 ierr = PetscSynchronizedFlush(A->comm); 731 732 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);} 733 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr); 734 ierr = PetscSynchronizedFlush(A->comm); 735 736 if (!lu->myid){ /* information from the host */ 737 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 738 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 739 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 740 741 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 742 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 743 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 744 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 745 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 746 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 747 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 748 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 749 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 750 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 751 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 752 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 753 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 754 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 755 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 756 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 757 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 758 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 759 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr); 760 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr); 761 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr); 762 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr); 763 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr); 764 } 765 766 PetscFunctionReturn(0); 767 } 768 769 #undef __FUNCT__ 770 #define __FUNCT__ "MatView_MUMPS" 771 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) { 772 PetscErrorCode ierr; 773 PetscTruth iascii; 774 PetscViewerFormat format; 775 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 776 777 PetscFunctionBegin; 778 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 779 780 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 781 if (iascii) { 782 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 783 if (format == PETSC_VIEWER_ASCII_INFO){ 784 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 785 } 786 } 787 PetscFunctionReturn(0); 788 } 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 792 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 793 PetscErrorCode ierr; 794 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 795 796 PetscFunctionBegin; 797 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 798 799 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 800 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 801 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 802 PetscFunctionReturn(0); 803 } 804 805 EXTERN_C_BEGIN 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 808 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 809 { 810 PetscErrorCode ierr; 811 PetscMPIInt size; 812 MPI_Comm comm; 813 Mat B=*newmat; 814 Mat_MUMPS *mumps; 815 816 PetscFunctionBegin; 817 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 818 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 819 820 if (reuse == MAT_INITIAL_MATRIX) { 821 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 822 /* A may have special container that is not duplicated, 823 e.g., A is obtainted from MatMatMult(,&A). Save B->ops instead */ 824 mumps->MatDuplicate = B->ops->duplicate; 825 mumps->MatView = B->ops->view; 826 mumps->MatAssemblyEnd = B->ops->assemblyend; 827 mumps->MatLUFactorSymbolic = B->ops->lufactorsymbolic; 828 mumps->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic; 829 mumps->MatDestroy = B->ops->destroy; 830 } else { 831 mumps->MatDuplicate = A->ops->duplicate; 832 mumps->MatView = A->ops->view; 833 mumps->MatAssemblyEnd = A->ops->assemblyend; 834 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 835 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 836 mumps->MatDestroy = A->ops->destroy; 837 } 838 mumps->specialdestroy = MatDestroy_AIJMUMPS; 839 mumps->CleanUpMUMPS = PETSC_FALSE; 840 mumps->isAIJ = PETSC_TRUE; 841 842 B->spptr = (void*)mumps; 843 B->ops->duplicate = MatDuplicate_MUMPS; 844 B->ops->view = MatView_MUMPS; 845 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 846 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 847 B->ops->destroy = MatDestroy_MUMPS; 848 849 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 850 if (size == 1) { 851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 852 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 854 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 855 } else { 856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 857 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 859 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 860 } 861 862 ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr); 863 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 864 *newmat = B; 865 PetscFunctionReturn(0); 866 } 867 EXTERN_C_END 868 869 /*MC 870 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 871 and sequential matrices via the external package MUMPS. 872 873 If MUMPS is installed (see the manual for instructions 874 on how to declare the existence of external packages), 875 a matrix type can be constructed which invokes MUMPS solvers. 876 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 877 878 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 879 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 880 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 881 for communicators controlling multiple processes. It is recommended that you call both of 882 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 883 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 884 without data copy. 885 886 Options Database Keys: 887 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 888 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 889 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 890 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 891 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 892 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 893 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 894 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 895 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 896 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 897 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 898 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 899 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 900 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 901 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 902 903 Level: beginner 904 905 .seealso: MATSBAIJMUMPS 906 M*/ 907 908 EXTERN_C_BEGIN 909 #undef __FUNCT__ 910 #define __FUNCT__ "MatCreate_AIJMUMPS" 911 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 912 { 913 PetscErrorCode ierr; 914 PetscMPIInt size; 915 916 PetscFunctionBegin; 917 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 918 if (size == 1) { 919 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 920 } else { 921 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 922 /* 923 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 924 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 925 */ 926 } 927 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 EXTERN_C_END 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 934 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 935 { 936 PetscErrorCode ierr; 937 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 938 939 PetscFunctionBegin; 940 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 941 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 942 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 943 PetscFunctionReturn(0); 944 } 945 946 EXTERN_C_BEGIN 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 949 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 950 { 951 Mat A; 952 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 953 PetscErrorCode ierr; 954 955 PetscFunctionBegin; 956 /* 957 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 958 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 959 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 960 block size info so that PETSc can determine the local size properly. The block size info is set 961 in the preallocation routine. 962 */ 963 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 964 A = ((Mat_MPISBAIJ *)B->data)->A; 965 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 EXTERN_C_END 969 970 EXTERN_C_BEGIN 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 973 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 974 { 975 PetscErrorCode ierr; 976 PetscMPIInt size; 977 MPI_Comm comm; 978 Mat B=*newmat; 979 Mat_MUMPS *mumps; 980 void (*f)(void); 981 982 PetscFunctionBegin; 983 if (reuse == MAT_INITIAL_MATRIX) { 984 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 985 } 986 987 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 988 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 989 990 mumps->MatDuplicate = A->ops->duplicate; 991 mumps->MatView = A->ops->view; 992 mumps->MatAssemblyEnd = A->ops->assemblyend; 993 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 994 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 995 mumps->MatDestroy = A->ops->destroy; 996 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 997 mumps->CleanUpMUMPS = PETSC_FALSE; 998 mumps->isAIJ = PETSC_FALSE; 999 1000 B->spptr = (void*)mumps; 1001 B->ops->duplicate = MatDuplicate_MUMPS; 1002 B->ops->view = MatView_MUMPS; 1003 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 1004 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 1005 B->ops->destroy = MatDestroy_MUMPS; 1006 1007 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1008 if (size == 1) { 1009 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 1010 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 1011 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 1012 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 1013 } else { 1014 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 1015 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 1016 if (f) { /* This case should always be true when this routine is called */ 1017 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 1018 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1019 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 1020 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 1021 } 1022 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 1023 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 1025 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 1026 } 1027 1028 ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr); 1029 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 1030 *newmat = B; 1031 PetscFunctionReturn(0); 1032 } 1033 EXTERN_C_END 1034 1035 #undef __FUNCT__ 1036 #define __FUNCT__ "MatDuplicate_MUMPS" 1037 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 1038 PetscErrorCode ierr; 1039 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 1040 1041 PetscFunctionBegin; 1042 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 1043 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 1044 PetscFunctionReturn(0); 1045 } 1046 1047 /*MC 1048 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 1049 distributed and sequential matrices via the external package MUMPS. 1050 1051 If MUMPS is installed (see the manual for instructions 1052 on how to declare the existence of external packages), 1053 a matrix type can be constructed which invokes MUMPS solvers. 1054 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 1055 1056 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 1057 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 1058 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 1059 for communicators controlling multiple processes. It is recommended that you call both of 1060 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 1061 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 1062 without data copy. 1063 1064 Options Database Keys: 1065 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 1066 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 1067 . -mat_mumps_icntl_4 <0,...,4> - print level 1068 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 1069 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 1070 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 1071 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 1072 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 1073 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 1074 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 1075 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 1076 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 1077 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 1078 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 1079 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 1080 1081 Level: beginner 1082 1083 .seealso: MATAIJMUMPS 1084 M*/ 1085 1086 EXTERN_C_BEGIN 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1089 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1090 { 1091 PetscErrorCode ierr; 1092 PetscMPIInt size; 1093 1094 PetscFunctionBegin; 1095 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1096 if (size == 1) { 1097 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1098 } else { 1099 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1100 } 1101 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 EXTERN_C_END 1105