1 2 /* 3 Provides an interface to the MUMPS_4.3.1 sparse solver 4 */ 5 #include "src/mat/impls/aij/seq/aij.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/mat/impls/sbaij/seq/sbaij.h" 8 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #include "zmumps_c.h" 13 #else 14 #include "dmumps_c.h" 15 #endif 16 EXTERN_C_END 17 #define JOB_INIT -1 18 #define JOB_END -2 19 /* macros s.t. indices match MUMPS documentation */ 20 #define ICNTL(I) icntl[(I)-1] 21 #define CNTL(I) cntl[(I)-1] 22 #define INFOG(I) infog[(I)-1] 23 #define INFO(I) info[(I)-1] 24 #define RINFOG(I) rinfog[(I)-1] 25 #define RINFO(I) rinfo[(I)-1] 26 27 typedef struct { 28 #if defined(PETSC_USE_COMPLEX) 29 ZMUMPS_STRUC_C id; 30 #else 31 DMUMPS_STRUC_C id; 32 #endif 33 MatStructure matstruc; 34 int myid,size,*irn,*jcn,sym; 35 PetscScalar *val; 36 MPI_Comm comm_mumps; 37 38 PetscTruth isAIJ,CleanUpMUMPS; 39 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 40 PetscErrorCode (*MatView)(Mat,PetscViewer); 41 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 42 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 43 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 44 PetscErrorCode (*MatDestroy)(Mat); 45 PetscErrorCode (*specialdestroy)(Mat); 46 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 47 } Mat_MUMPS; 48 49 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 50 EXTERN_C_BEGIN 51 PetscErrorCode MatConvert_SBAIJ_SBAIJMUMPS(Mat,const MatType,Mat*); 52 EXTERN_C_END 53 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 54 /* 55 input: 56 A - matrix in mpiaij or mpisbaij (bs=1) format 57 shift - 0: C style output triple; 1: Fortran style output triple. 58 valOnly - FALSE: spaces are allocated and values are set for the triple 59 TRUE: only the values in v array are updated 60 output: 61 nnz - dim of r, c, and v (number of local nonzero entries of A) 62 r, c, v - row and col index, matrix values (matrix triples) 63 */ 64 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 65 int *ai, *aj, *bi, *bj, rstart,nz, *garray; 66 PetscErrorCode ierr; 67 int i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol; 68 int *row,*col; 69 PetscScalar *av, *bv,*val; 70 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 71 72 PetscFunctionBegin; 73 if (mumps->isAIJ){ 74 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 75 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 76 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 77 nz = aa->nz + bb->nz; 78 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 79 garray = mat->garray; 80 av=aa->a; bv=bb->a; 81 82 } else { 83 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 84 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 85 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 86 if (mat->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", mat->bs); 87 nz = aa->nz + bb->nz; 88 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 89 garray = mat->garray; 90 av=aa->a; bv=bb->a; 91 } 92 93 if (!valOnly){ 94 ierr = PetscMalloc(nz*sizeof(int),&row);CHKERRQ(ierr); 95 ierr = PetscMalloc(nz*sizeof(int),&col);CHKERRQ(ierr); 96 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 97 *r = row; *c = col; *v = val; 98 } else { 99 row = *r; col = *c; val = *v; 100 } 101 *nnz = nz; 102 103 jj = 0; irow = rstart; 104 for ( i=0; i<m; i++ ) { 105 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 106 countA = ai[i+1] - ai[i]; 107 countB = bi[i+1] - bi[i]; 108 bjj = bj + bi[i]; 109 110 /* get jB, the starting local col index for the 2nd B-part */ 111 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 112 j=-1; 113 do { 114 j++; 115 if (j == countB) break; 116 jcol = garray[bjj[j]]; 117 } while (jcol < colA_start); 118 jB = j; 119 120 /* B-part, smaller col index */ 121 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 122 for (j=0; j<jB; j++){ 123 jcol = garray[bjj[j]]; 124 if (!valOnly){ 125 row[jj] = irow + shift; col[jj] = jcol + shift; 126 127 } 128 val[jj++] = *bv++; 129 } 130 /* A-part */ 131 for (j=0; j<countA; j++){ 132 if (!valOnly){ 133 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 134 } 135 val[jj++] = *av++; 136 } 137 /* B-part, larger col index */ 138 for (j=jB; j<countB; j++){ 139 if (!valOnly){ 140 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 141 } 142 val[jj++] = *bv++; 143 } 144 irow++; 145 } 146 147 PetscFunctionReturn(0); 148 } 149 150 EXTERN_C_BEGIN 151 #undef __FUNCT__ 152 #define __FUNCT__ "MatConvert_MUMPS_Base" 153 PetscErrorCode MatConvert_MUMPS_Base(Mat A,const MatType type,Mat *newmat) { 154 PetscErrorCode ierr; 155 Mat B=*newmat; 156 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 157 158 PetscFunctionBegin; 159 if (B != A) { 160 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 161 } 162 B->ops->duplicate = mumps->MatDuplicate; 163 B->ops->view = mumps->MatView; 164 B->ops->assemblyend = mumps->MatAssemblyEnd; 165 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 166 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 167 B->ops->destroy = mumps->MatDestroy; 168 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 169 ierr = PetscFree(mumps);CHKERRQ(ierr); 170 *newmat = B; 171 PetscFunctionReturn(0); 172 } 173 EXTERN_C_END 174 175 #undef __FUNCT__ 176 #define __FUNCT__ "MatDestroy_MUMPS" 177 PetscErrorCode MatDestroy_MUMPS(Mat A) 178 { 179 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 180 PetscErrorCode ierr; 181 int size=lu->size; 182 PetscErrorCode (*specialdestroy)(Mat); 183 PetscFunctionBegin; 184 if (lu->CleanUpMUMPS) { 185 /* Terminate instance, deallocate memories */ 186 lu->id.job=JOB_END; 187 #if defined(PETSC_USE_COMPLEX) 188 zmumps_c(&lu->id); 189 #else 190 dmumps_c(&lu->id); 191 #endif 192 if (lu->irn) { 193 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 194 } 195 if (lu->jcn) { 196 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 197 } 198 if (size>1 && lu->val) { 199 ierr = PetscFree(lu->val);CHKERRQ(ierr); 200 } 201 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 202 } 203 specialdestroy = lu->specialdestroy; 204 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 205 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 206 PetscFunctionReturn(0); 207 } 208 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatDestroy_AIJMUMPS" 211 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 212 { 213 PetscErrorCode ierr; 214 int size; 215 216 PetscFunctionBegin; 217 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 218 if (size==1) { 219 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 220 } else { 221 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 222 } 223 PetscFunctionReturn(0); 224 } 225 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 228 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 229 { 230 PetscErrorCode ierr; 231 int size; 232 233 PetscFunctionBegin; 234 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 235 if (size==1) { 236 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,&A);CHKERRQ(ierr); 237 } else { 238 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,&A);CHKERRQ(ierr); 239 } 240 PetscFunctionReturn(0); 241 } 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatFactorInfo_MUMPS" 245 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 246 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 247 PetscErrorCode ierr; 248 249 PetscFunctionBegin; 250 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 251 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 252 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 253 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 254 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 255 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 256 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 257 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 258 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 259 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 260 if (!lu->myid && lu->id.ICNTL(11)>0) { 261 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 262 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 263 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 264 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 265 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 266 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 267 268 } 269 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 270 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 271 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(15) (efficiency control): %d \n",lu->id.ICNTL(15));CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 274 275 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 276 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 277 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 278 279 /* infomation local to each processor */ 280 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 281 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 282 ierr = PetscSynchronizedFlush(A->comm); 283 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 284 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 285 ierr = PetscSynchronizedFlush(A->comm); 286 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 287 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 288 ierr = PetscSynchronizedFlush(A->comm); 289 290 if (!lu->myid){ /* information from the host */ 291 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 292 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 293 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 294 295 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 296 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 297 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 298 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 299 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 300 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 301 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 302 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 303 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 304 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 305 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 306 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 307 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 308 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 309 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 310 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 311 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 312 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 313 } 314 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatView_AIJMUMPS" 320 PetscErrorCode MatView_AIJMUMPS(Mat A,PetscViewer viewer) { 321 PetscErrorCode ierr; 322 PetscTruth iascii; 323 PetscViewerFormat format; 324 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 325 326 PetscFunctionBegin; 327 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 328 329 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 330 if (iascii) { 331 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 332 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 333 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 334 } 335 } 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "MatSolve_AIJMUMPS" 341 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 342 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 343 PetscScalar *array; 344 Vec x_seq; 345 IS iden; 346 VecScatter scat; 347 PetscErrorCode ierr; 348 349 PetscFunctionBegin; 350 if (lu->size > 1){ 351 if (!lu->myid){ 352 ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr); 353 ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr); 354 } else { 355 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr); 356 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr); 357 } 358 ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr); 359 ierr = ISDestroy(iden);CHKERRQ(ierr); 360 361 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 362 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 363 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 364 } else { /* size == 1 */ 365 ierr = VecCopy(b,x);CHKERRQ(ierr); 366 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 367 } 368 if (!lu->myid) { /* define rhs on the host */ 369 #if defined(PETSC_USE_COMPLEX) 370 lu->id.rhs = (mumps_double_complex*)array; 371 #else 372 lu->id.rhs = array; 373 #endif 374 } 375 376 /* solve phase */ 377 lu->id.job=3; 378 #if defined(PETSC_USE_COMPLEX) 379 zmumps_c(&lu->id); 380 #else 381 dmumps_c(&lu->id); 382 #endif 383 if (lu->id.INFOG(1) < 0) { 384 SETERRQ1(1,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 385 } 386 387 /* convert mumps solution x_seq to petsc mpi x */ 388 if (lu->size > 1) { 389 if (!lu->myid){ 390 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 391 } 392 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 393 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 394 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 395 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 396 } else { 397 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 398 } 399 400 PetscFunctionReturn(0); 401 } 402 403 /* 404 input: 405 F: numeric factor 406 output: 407 nneg: total number of negative pivots 408 nzero: 0 409 npos: (global dimension of F) - nneg 410 */ 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 414 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 415 { 416 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 417 PetscErrorCode ierr; 418 int size; 419 420 PetscFunctionBegin; 421 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 422 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 423 if (size > 1 && lu->id.ICNTL(13) != 1){ 424 SETERRQ1(1,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 425 } 426 if (nneg){ 427 if (!lu->myid){ 428 *nneg = lu->id.INFOG(12); 429 } 430 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 431 } 432 if (nzero) *nzero = 0; 433 if (npos) *npos = F->M - (*nneg); 434 PetscFunctionReturn(0); 435 } 436 437 #undef __FUNCT__ 438 #define __FUNCT__ "MatFactorNumeric_MPIAIJMUMPS" 439 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,Mat *F) { 440 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 441 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 442 PetscErrorCode ierr; 443 int rnz,nnz,nz,i,M=A->M,*ai,*aj,icntl; 444 PetscTruth valOnly,flg; 445 446 PetscFunctionBegin; 447 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 448 (*F)->ops->solve = MatSolve_AIJMUMPS; 449 450 /* Initialize a MUMPS instance */ 451 ierr = MPI_Comm_rank(A->comm, &lu->myid); 452 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 453 lua->myid = lu->myid; lua->size = lu->size; 454 lu->id.job = JOB_INIT; 455 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 456 lu->id.comm_fortran = lu->comm_mumps; 457 458 /* Set mumps options */ 459 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 460 lu->id.par=1; /* host participates factorizaton and solve */ 461 lu->id.sym=lu->sym; 462 if (lu->sym == 2){ 463 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 464 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 465 } 466 #if defined(PETSC_USE_COMPLEX) 467 zmumps_c(&lu->id); 468 #else 469 dmumps_c(&lu->id); 470 #endif 471 472 if (lu->size == 1){ 473 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 474 } else { 475 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 476 } 477 478 icntl=-1; 479 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 480 if (flg && icntl > 0) { 481 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 482 } else { /* no output */ 483 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 484 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 485 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 486 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 487 } 488 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 489 icntl=-1; 490 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 491 if (flg) { 492 if (icntl== 1){ 493 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 494 } else { 495 lu->id.ICNTL(7) = icntl; 496 } 497 } 498 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 499 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 500 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 501 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 502 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 503 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 504 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 505 506 /* 507 ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr); 508 if (flg){ 509 if (icntl >-1 && icntl <3 ){ 510 if (lu->myid==0) lu->id.ICNTL(16) = icntl; 511 } else { 512 SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl); 513 } 514 } 515 */ 516 517 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 518 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 519 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 520 PetscOptionsEnd(); 521 } 522 523 /* define matrix A */ 524 switch (lu->id.ICNTL(18)){ 525 case 0: /* centralized assembled matrix input (size=1) */ 526 if (!lu->myid) { 527 if (lua->isAIJ){ 528 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 529 nz = aa->nz; 530 ai = aa->i; aj = aa->j; lu->val = aa->a; 531 } else { 532 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 533 nz = aa->nz; 534 ai = aa->i; aj = aa->j; lu->val = aa->a; 535 } 536 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 537 ierr = PetscMalloc(nz*sizeof(int),&lu->irn);CHKERRQ(ierr); 538 ierr = PetscMalloc(nz*sizeof(int),&lu->jcn);CHKERRQ(ierr); 539 nz = 0; 540 for (i=0; i<M; i++){ 541 rnz = ai[i+1] - ai[i]; 542 while (rnz--) { /* Fortran row/col index! */ 543 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 544 } 545 } 546 } 547 } 548 break; 549 case 3: /* distributed assembled matrix input (size>1) */ 550 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 551 valOnly = PETSC_FALSE; 552 } else { 553 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 554 } 555 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 556 break; 557 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 558 } 559 560 /* analysis phase */ 561 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 562 lu->id.n = M; 563 switch (lu->id.ICNTL(18)){ 564 case 0: /* centralized assembled matrix input */ 565 if (!lu->myid) { 566 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 567 if (lu->id.ICNTL(6)>1){ 568 #if defined(PETSC_USE_COMPLEX) 569 lu->id.a = (mumps_double_complex*)lu->val; 570 #else 571 lu->id.a = lu->val; 572 #endif 573 } 574 } 575 break; 576 case 3: /* distributed assembled matrix input (size>1) */ 577 lu->id.nz_loc = nnz; 578 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 579 if (lu->id.ICNTL(6)>1) { 580 #if defined(PETSC_USE_COMPLEX) 581 lu->id.a_loc = (mumps_double_complex*)lu->val; 582 #else 583 lu->id.a_loc = lu->val; 584 #endif 585 } 586 break; 587 } 588 lu->id.job=1; 589 #if defined(PETSC_USE_COMPLEX) 590 zmumps_c(&lu->id); 591 #else 592 dmumps_c(&lu->id); 593 #endif 594 if (lu->id.INFOG(1) < 0) { 595 SETERRQ1(1,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 596 } 597 } 598 599 /* numerical factorization phase */ 600 if(!lu->id.ICNTL(18)) { 601 if (!lu->myid) { 602 #if defined(PETSC_USE_COMPLEX) 603 lu->id.a = (mumps_double_complex*)lu->val; 604 #else 605 lu->id.a = lu->val; 606 #endif 607 } 608 } else { 609 #if defined(PETSC_USE_COMPLEX) 610 lu->id.a_loc = (mumps_double_complex*)lu->val; 611 #else 612 lu->id.a_loc = lu->val; 613 #endif 614 } 615 lu->id.job=2; 616 #if defined(PETSC_USE_COMPLEX) 617 zmumps_c(&lu->id); 618 #else 619 dmumps_c(&lu->id); 620 #endif 621 if (lu->id.INFOG(1) < 0) { 622 SETERRQ2(1,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 623 } 624 625 if (lu->myid==0 && lu->id.ICNTL(16) > 0){ 626 SETERRQ1(1," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 627 } 628 629 (*F)->assembled = PETSC_TRUE; 630 lu->matstruc = SAME_NONZERO_PATTERN; 631 lu->CleanUpMUMPS = PETSC_TRUE; 632 PetscFunctionReturn(0); 633 } 634 635 /* Note the Petsc r and c permutations are ignored */ 636 #undef __FUNCT__ 637 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 638 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 639 Mat B; 640 Mat_MUMPS *lu; 641 PetscErrorCode ierr; 642 643 PetscFunctionBegin; 644 645 /* Create the factorization matrix */ 646 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr); 647 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 648 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 649 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 650 651 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 652 B->factor = FACTOR_LU; 653 lu = (Mat_MUMPS*)B->spptr; 654 lu->sym = 0; 655 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 656 657 *F = B; 658 PetscFunctionReturn(0); 659 } 660 661 /* Note the Petsc r permutation is ignored */ 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 664 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 665 Mat B; 666 Mat_MUMPS *lu; 667 PetscErrorCode ierr; 668 669 PetscFunctionBegin; 670 671 /* Create the factorization matrix */ 672 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr); 673 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 674 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 675 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 676 677 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 678 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 679 B->factor = FACTOR_CHOLESKY; 680 lu = (Mat_MUMPS*)B->spptr; 681 lu->sym = 2; 682 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 683 684 *F = B; 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 690 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 691 PetscErrorCode ierr; 692 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 693 694 PetscFunctionBegin; 695 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 696 697 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 698 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 699 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 700 PetscFunctionReturn(0); 701 } 702 703 EXTERN_C_BEGIN 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 706 PetscErrorCode MatConvert_AIJ_AIJMUMPS(Mat A,const MatType newtype,Mat *newmat)\ 707 { 708 PetscErrorCode ierr; 709 int size; 710 MPI_Comm comm; 711 Mat B=*newmat; 712 Mat_MUMPS *mumps; 713 714 PetscFunctionBegin; 715 if (B != A) { 716 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 717 } 718 719 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 720 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 721 722 mumps->MatDuplicate = A->ops->duplicate; 723 mumps->MatView = A->ops->view; 724 mumps->MatAssemblyEnd = A->ops->assemblyend; 725 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 726 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 727 mumps->MatDestroy = A->ops->destroy; 728 mumps->specialdestroy = MatDestroy_AIJMUMPS; 729 mumps->CleanUpMUMPS = PETSC_FALSE; 730 mumps->isAIJ = PETSC_TRUE; 731 732 B->spptr = (void*)mumps; 733 B->ops->duplicate = MatDuplicate_MUMPS; 734 B->ops->view = MatView_AIJMUMPS; 735 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 736 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 737 B->ops->destroy = MatDestroy_MUMPS; 738 739 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 740 if (size == 1) { 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 742 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 744 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 745 } else { 746 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 747 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 748 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 749 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 750 } 751 752 PetscLogInfo(0,"Using MUMPS for LU factorization and solves."); 753 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 754 *newmat = B; 755 PetscFunctionReturn(0); 756 } 757 EXTERN_C_END 758 759 /*MC 760 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 761 and sequential matrices via the external package MUMPS. 762 763 If MUMPS is installed (see the manual for instructions 764 on how to declare the existence of external packages), 765 a matrix type can be constructed which invokes MUMPS solvers. 766 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 767 This matrix type is only supported for double precision real. 768 769 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 770 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 771 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 772 for communicators controlling multiple processes. It is recommended that you call both of 773 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 774 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 775 without data copy. 776 777 Options Database Keys: 778 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 779 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 780 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 781 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 782 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 783 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 784 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 785 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 786 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 787 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 788 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 789 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 790 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 791 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 792 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 793 794 Level: beginner 795 796 .seealso: MATSBAIJMUMPS 797 M*/ 798 799 EXTERN_C_BEGIN 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatCreate_AIJMUMPS" 802 PetscErrorCode MatCreate_AIJMUMPS(Mat A) 803 { 804 PetscErrorCode ierr; 805 int size; 806 Mat A_diag; 807 MPI_Comm comm; 808 809 PetscFunctionBegin; 810 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 811 /* and AIJMUMPS types */ 812 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 813 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 814 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 815 if (size == 1) { 816 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 817 } else { 818 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 819 A_diag = ((Mat_MPIAIJ *)A->data)->A; 820 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,&A_diag);CHKERRQ(ierr); 821 } 822 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,&A);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } 825 EXTERN_C_END 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 829 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 830 { 831 PetscErrorCode ierr; 832 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 833 834 PetscFunctionBegin; 835 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 836 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 837 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 838 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 839 PetscFunctionReturn(0); 840 } 841 842 EXTERN_C_BEGIN 843 #undef __FUNCT__ 844 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 845 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 846 { 847 Mat A; 848 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 849 PetscErrorCode ierr; 850 851 PetscFunctionBegin; 852 /* 853 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 854 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 855 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 856 block size info so that PETSc can determine the local size properly. The block size info is set 857 in the preallocation routine. 858 */ 859 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 860 A = ((Mat_MPISBAIJ *)B->data)->A; 861 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 EXTERN_C_END 865 866 EXTERN_C_BEGIN 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 869 PetscErrorCode MatConvert_SBAIJ_SBAIJMUMPS(Mat A,const MatType newtype,Mat *newmat) 870 { 871 PetscErrorCode ierr; 872 int size; 873 MPI_Comm comm; 874 Mat B=*newmat; 875 Mat_MUMPS *mumps; 876 void (*f)(void); 877 878 PetscFunctionBegin; 879 if (B != A) { 880 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 881 } 882 883 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 884 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 885 886 mumps->MatDuplicate = A->ops->duplicate; 887 mumps->MatView = A->ops->view; 888 mumps->MatAssemblyEnd = A->ops->assemblyend; 889 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 890 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 891 mumps->MatDestroy = A->ops->destroy; 892 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 893 mumps->CleanUpMUMPS = PETSC_FALSE; 894 mumps->isAIJ = PETSC_FALSE; 895 896 B->spptr = (void*)mumps; 897 B->ops->duplicate = MatDuplicate_MUMPS; 898 B->ops->view = MatView_AIJMUMPS; 899 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 900 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 901 B->ops->destroy = MatDestroy_MUMPS; 902 903 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 904 if (size == 1) { 905 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 906 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 907 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 908 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 909 } else { 910 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 911 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr); 912 if (f) { 913 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 914 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 915 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 916 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 917 } 918 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 919 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 920 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 921 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 922 } 923 924 PetscLogInfo(0,"Using MUMPS for Cholesky factorization and solves."); 925 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 926 *newmat = B; 927 PetscFunctionReturn(0); 928 } 929 EXTERN_C_END 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatDuplicate_MUMPS" 933 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 934 PetscErrorCode ierr; 935 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 936 937 PetscFunctionBegin; 938 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 939 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 /*MC 944 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 945 distributed and sequential matrices via the external package MUMPS. 946 947 If MUMPS is installed (see the manual for instructions 948 on how to declare the existence of external packages), 949 a matrix type can be constructed which invokes MUMPS solvers. 950 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 951 This matrix type is only supported for double precision real. 952 953 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 954 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 955 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 956 for communicators controlling multiple processes. It is recommended that you call both of 957 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 958 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 959 without data copy. 960 961 Options Database Keys: 962 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 963 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 964 . -mat_mumps_icntl_4 <0,...,4> - print level 965 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 966 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 967 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 968 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 969 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 970 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 971 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 972 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 973 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 974 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 975 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 976 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 977 978 Level: beginner 979 980 .seealso: MATAIJMUMPS 981 M*/ 982 983 EXTERN_C_BEGIN 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 986 PetscErrorCode MatCreate_SBAIJMUMPS(Mat A) 987 { 988 PetscErrorCode ierr; 989 int size; 990 991 PetscFunctionBegin; 992 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 993 /* and SBAIJMUMPS types */ 994 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 995 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 996 if (size == 1) { 997 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 998 } else { 999 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1000 } 1001 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 EXTERN_C_END 1005