1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS_4.3.1 sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 int myid,size,*irn,*jcn,sym; 36 PetscScalar *val; 37 MPI_Comm comm_mumps; 38 39 PetscTruth isAIJ,CleanUpMUMPS; 40 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 41 PetscErrorCode (*MatView)(Mat,PetscViewer); 42 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 43 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 44 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 45 PetscErrorCode (*MatDestroy)(Mat); 46 PetscErrorCode (*specialdestroy)(Mat); 47 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 48 } Mat_MUMPS; 49 50 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 51 EXTERN_C_BEGIN 52 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 53 EXTERN_C_END 54 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 55 /* 56 input: 57 A - matrix in mpiaij or mpisbaij (bs=1) format 58 shift - 0: C style output triple; 1: Fortran style output triple. 59 valOnly - FALSE: spaces are allocated and values are set for the triple 60 TRUE: only the values in v array are updated 61 output: 62 nnz - dim of r, c, and v (number of local nonzero entries of A) 63 r, c, v - row and col index, matrix values (matrix triples) 64 */ 65 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 66 int *ai, *aj, *bi, *bj, rstart,nz, *garray; 67 PetscErrorCode ierr; 68 int i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol; 69 int *row,*col; 70 PetscScalar *av, *bv,*val; 71 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 72 73 PetscFunctionBegin; 74 if (mumps->isAIJ){ 75 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 76 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 77 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 78 nz = aa->nz + bb->nz; 79 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 80 garray = mat->garray; 81 av=aa->a; bv=bb->a; 82 83 } else { 84 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 85 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 86 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 87 if (A->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->bs); 88 nz = aa->nz + bb->nz; 89 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 90 garray = mat->garray; 91 av=aa->a; bv=bb->a; 92 } 93 94 if (!valOnly){ 95 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 96 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 97 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 98 *r = row; *c = col; *v = val; 99 } else { 100 row = *r; col = *c; val = *v; 101 } 102 *nnz = nz; 103 104 jj = 0; irow = rstart; 105 for ( i=0; i<m; i++ ) { 106 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 107 countA = ai[i+1] - ai[i]; 108 countB = bi[i+1] - bi[i]; 109 bjj = bj + bi[i]; 110 111 /* get jB, the starting local col index for the 2nd B-part */ 112 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 113 j=-1; 114 do { 115 j++; 116 if (j == countB) break; 117 jcol = garray[bjj[j]]; 118 } while (jcol < colA_start); 119 jB = j; 120 121 /* B-part, smaller col index */ 122 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 123 for (j=0; j<jB; j++){ 124 jcol = garray[bjj[j]]; 125 if (!valOnly){ 126 row[jj] = irow + shift; col[jj] = jcol + shift; 127 128 } 129 val[jj++] = *bv++; 130 } 131 /* A-part */ 132 for (j=0; j<countA; j++){ 133 if (!valOnly){ 134 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 135 } 136 val[jj++] = *av++; 137 } 138 /* B-part, larger col index */ 139 for (j=jB; j<countB; j++){ 140 if (!valOnly){ 141 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 142 } 143 val[jj++] = *bv++; 144 } 145 irow++; 146 } 147 148 PetscFunctionReturn(0); 149 } 150 151 EXTERN_C_BEGIN 152 #undef __FUNCT__ 153 #define __FUNCT__ "MatConvert_MUMPS_Base" 154 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) \ 155 { 156 PetscErrorCode ierr; 157 Mat B=*newmat; 158 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 159 void (*f)(void); 160 161 PetscFunctionBegin; 162 if (reuse == MAT_INITIAL_MATRIX) { 163 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 164 } 165 B->ops->duplicate = mumps->MatDuplicate; 166 B->ops->view = mumps->MatView; 167 B->ops->assemblyend = mumps->MatAssemblyEnd; 168 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 169 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 170 B->ops->destroy = mumps->MatDestroy; 171 172 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr); 173 if (f) { 174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(FCNVOID)mumps->MatPreallocate);CHKERRQ(ierr); 175 } 176 ierr = PetscFree(mumps);CHKERRQ(ierr); 177 178 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 186 187 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 188 *newmat = B; 189 PetscFunctionReturn(0); 190 } 191 EXTERN_C_END 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatDestroy_MUMPS" 195 PetscErrorCode MatDestroy_MUMPS(Mat A) 196 { 197 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 198 PetscErrorCode ierr; 199 int size=lu->size; 200 PetscErrorCode (*specialdestroy)(Mat); 201 PetscFunctionBegin; 202 if (lu->CleanUpMUMPS) { 203 /* Terminate instance, deallocate memories */ 204 lu->id.job=JOB_END; 205 #if defined(PETSC_USE_COMPLEX) 206 zmumps_c(&lu->id); 207 #else 208 dmumps_c(&lu->id); 209 #endif 210 if (lu->irn) { 211 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 212 } 213 if (lu->jcn) { 214 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 215 } 216 if (size>1 && lu->val) { 217 ierr = PetscFree(lu->val);CHKERRQ(ierr); 218 } 219 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 220 } 221 specialdestroy = lu->specialdestroy; 222 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 223 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 #undef __FUNCT__ 228 #define __FUNCT__ "MatDestroy_AIJMUMPS" 229 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 230 { 231 PetscErrorCode ierr; 232 int size; 233 234 PetscFunctionBegin; 235 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 236 if (size==1) { 237 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 238 } else { 239 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 246 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 247 { 248 PetscErrorCode ierr; 249 int size; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 253 if (size==1) { 254 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 255 } else { 256 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatFactorInfo_MUMPS" 263 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 264 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 265 PetscErrorCode ierr; 266 267 PetscFunctionBegin; 268 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 269 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 270 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 271 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 274 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 275 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 276 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 277 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 278 if (!lu->myid && lu->id.ICNTL(11)>0) { 279 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 280 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 281 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 282 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 283 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 284 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 285 286 } 287 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 288 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 289 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 290 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(15) (efficiency control): %d \n",lu->id.ICNTL(15));CHKERRQ(ierr); 291 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 292 293 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 294 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 295 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 296 297 /* infomation local to each processor */ 298 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 299 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 300 ierr = PetscSynchronizedFlush(A->comm); 301 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 302 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 303 ierr = PetscSynchronizedFlush(A->comm); 304 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 305 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 306 ierr = PetscSynchronizedFlush(A->comm); 307 308 if (!lu->myid){ /* information from the host */ 309 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 310 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 311 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 312 313 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 314 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 315 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 316 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 317 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 318 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 319 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 320 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 321 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 322 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 323 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 324 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 325 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 326 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 327 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 328 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 329 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 330 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 331 } 332 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatView_AIJMUMPS" 338 PetscErrorCode MatView_AIJMUMPS(Mat A,PetscViewer viewer) { 339 PetscErrorCode ierr; 340 PetscTruth iascii; 341 PetscViewerFormat format; 342 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 343 344 PetscFunctionBegin; 345 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 346 347 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 348 if (iascii) { 349 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 350 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 351 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 352 } 353 } 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatSolve_AIJMUMPS" 359 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 360 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 361 PetscScalar *array; 362 Vec x_seq; 363 IS iden; 364 VecScatter scat; 365 PetscErrorCode ierr; 366 367 PetscFunctionBegin; 368 if (lu->size > 1){ 369 if (!lu->myid){ 370 ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr); 371 ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr); 372 } else { 373 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr); 374 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr); 375 } 376 ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr); 377 ierr = ISDestroy(iden);CHKERRQ(ierr); 378 379 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 380 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 381 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 382 } else { /* size == 1 */ 383 ierr = VecCopy(b,x);CHKERRQ(ierr); 384 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 385 } 386 if (!lu->myid) { /* define rhs on the host */ 387 #if defined(PETSC_USE_COMPLEX) 388 lu->id.rhs = (mumps_double_complex*)array; 389 #else 390 lu->id.rhs = array; 391 #endif 392 } 393 394 /* solve phase */ 395 lu->id.job=3; 396 #if defined(PETSC_USE_COMPLEX) 397 zmumps_c(&lu->id); 398 #else 399 dmumps_c(&lu->id); 400 #endif 401 if (lu->id.INFOG(1) < 0) { 402 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 403 } 404 405 /* convert mumps solution x_seq to petsc mpi x */ 406 if (lu->size > 1) { 407 if (!lu->myid){ 408 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 409 } 410 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 411 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 412 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 413 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 414 } else { 415 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 416 } 417 418 PetscFunctionReturn(0); 419 } 420 421 /* 422 input: 423 F: numeric factor 424 output: 425 nneg: total number of negative pivots 426 nzero: 0 427 npos: (global dimension of F) - nneg 428 */ 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 432 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 433 { 434 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 435 PetscErrorCode ierr; 436 int size; 437 438 PetscFunctionBegin; 439 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 440 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 441 if (size > 1 && lu->id.ICNTL(13) != 1){ 442 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 443 } 444 if (nneg){ 445 if (!lu->myid){ 446 *nneg = lu->id.INFOG(12); 447 } 448 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 449 } 450 if (nzero) *nzero = 0; 451 if (npos) *npos = F->M - (*nneg); 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS" 457 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F) 458 { 459 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 460 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 461 PetscErrorCode ierr; 462 PetscInt rnz,nnz,nz,i,M=A->M,*ai,*aj,icntl; 463 PetscTruth valOnly,flg; 464 465 PetscFunctionBegin; 466 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 467 (*F)->ops->solve = MatSolve_AIJMUMPS; 468 469 /* Initialize a MUMPS instance */ 470 ierr = MPI_Comm_rank(A->comm, &lu->myid); 471 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 472 lua->myid = lu->myid; lua->size = lu->size; 473 lu->id.job = JOB_INIT; 474 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 475 lu->id.comm_fortran = lu->comm_mumps; 476 477 /* Set mumps options */ 478 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 479 lu->id.par=1; /* host participates factorizaton and solve */ 480 lu->id.sym=lu->sym; 481 if (lu->sym == 2){ 482 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 483 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 484 } 485 #if defined(PETSC_USE_COMPLEX) 486 zmumps_c(&lu->id); 487 #else 488 dmumps_c(&lu->id); 489 #endif 490 491 if (lu->size == 1){ 492 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 493 } else { 494 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 495 } 496 497 icntl=-1; 498 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 499 if ((flg && icntl > 0) || PetscLogPrintInfo) { 500 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 501 } else { /* no output */ 502 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 503 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 504 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 505 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 506 } 507 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 508 icntl=-1; 509 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 510 if (flg) { 511 if (icntl== 1){ 512 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 513 } else { 514 lu->id.ICNTL(7) = icntl; 515 } 516 } 517 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 518 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 519 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 520 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 521 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 522 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 523 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 524 525 /* 526 ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr); 527 if (flg){ 528 if (icntl >-1 && icntl <3 ){ 529 if (lu->myid==0) lu->id.ICNTL(16) = icntl; 530 } else { 531 SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl); 532 } 533 } 534 */ 535 536 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 537 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 538 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 539 PetscOptionsEnd(); 540 } 541 542 /* define matrix A */ 543 switch (lu->id.ICNTL(18)){ 544 case 0: /* centralized assembled matrix input (size=1) */ 545 if (!lu->myid) { 546 if (lua->isAIJ){ 547 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 548 nz = aa->nz; 549 ai = aa->i; aj = aa->j; lu->val = aa->a; 550 } else { 551 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 552 nz = aa->nz; 553 ai = aa->i; aj = aa->j; lu->val = aa->a; 554 } 555 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 556 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 557 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 558 nz = 0; 559 for (i=0; i<M; i++){ 560 rnz = ai[i+1] - ai[i]; 561 while (rnz--) { /* Fortran row/col index! */ 562 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 563 } 564 } 565 } 566 } 567 break; 568 case 3: /* distributed assembled matrix input (size>1) */ 569 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 570 valOnly = PETSC_FALSE; 571 } else { 572 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 573 } 574 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 575 break; 576 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 577 } 578 579 /* analysis phase */ 580 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 581 lu->id.n = M; 582 switch (lu->id.ICNTL(18)){ 583 case 0: /* centralized assembled matrix input */ 584 if (!lu->myid) { 585 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 586 if (lu->id.ICNTL(6)>1){ 587 #if defined(PETSC_USE_COMPLEX) 588 lu->id.a = (mumps_double_complex*)lu->val; 589 #else 590 lu->id.a = lu->val; 591 #endif 592 } 593 } 594 break; 595 case 3: /* distributed assembled matrix input (size>1) */ 596 lu->id.nz_loc = nnz; 597 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 598 if (lu->id.ICNTL(6)>1) { 599 #if defined(PETSC_USE_COMPLEX) 600 lu->id.a_loc = (mumps_double_complex*)lu->val; 601 #else 602 lu->id.a_loc = lu->val; 603 #endif 604 } 605 break; 606 } 607 lu->id.job=1; 608 #if defined(PETSC_USE_COMPLEX) 609 zmumps_c(&lu->id); 610 #else 611 dmumps_c(&lu->id); 612 #endif 613 if (lu->id.INFOG(1) < 0) { 614 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 615 } 616 } 617 618 /* numerical factorization phase */ 619 if(!lu->id.ICNTL(18)) { 620 if (!lu->myid) { 621 #if defined(PETSC_USE_COMPLEX) 622 lu->id.a = (mumps_double_complex*)lu->val; 623 #else 624 lu->id.a = lu->val; 625 #endif 626 } 627 } else { 628 #if defined(PETSC_USE_COMPLEX) 629 lu->id.a_loc = (mumps_double_complex*)lu->val; 630 #else 631 lu->id.a_loc = lu->val; 632 #endif 633 } 634 lu->id.job=2; 635 #if defined(PETSC_USE_COMPLEX) 636 zmumps_c(&lu->id); 637 #else 638 dmumps_c(&lu->id); 639 #endif 640 if (lu->id.INFOG(1) < 0) { 641 if (lu->id.INFO(1) == -13) { 642 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 643 } else { 644 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 645 } 646 } 647 648 if (!lu->myid && lu->id.ICNTL(16) > 0){ 649 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 650 } 651 652 (*F)->assembled = PETSC_TRUE; 653 lu->matstruc = SAME_NONZERO_PATTERN; 654 lu->CleanUpMUMPS = PETSC_TRUE; 655 PetscFunctionReturn(0); 656 } 657 658 /* Note the Petsc r and c permutations are ignored */ 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 661 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 662 Mat B; 663 Mat_MUMPS *lu; 664 PetscErrorCode ierr; 665 666 PetscFunctionBegin; 667 668 /* Create the factorization matrix */ 669 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 670 ierr = MatSetSizes(B,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 671 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 672 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 673 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 674 675 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 676 B->factor = FACTOR_LU; 677 lu = (Mat_MUMPS*)B->spptr; 678 lu->sym = 0; 679 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 680 681 *F = B; 682 PetscFunctionReturn(0); 683 } 684 685 /* Note the Petsc r permutation is ignored */ 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 688 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 689 Mat B; 690 Mat_MUMPS *lu; 691 PetscErrorCode ierr; 692 693 PetscFunctionBegin; 694 /* Create the factorization matrix */ 695 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 696 ierr = MatSetSizes(B,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 697 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 698 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 699 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 700 701 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 702 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 703 B->factor = FACTOR_CHOLESKY; 704 lu = (Mat_MUMPS*)B->spptr; 705 lu->sym = 2; 706 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 707 708 *F = B; 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 714 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 715 PetscErrorCode ierr; 716 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 717 718 PetscFunctionBegin; 719 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 720 721 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 722 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 723 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 724 PetscFunctionReturn(0); 725 } 726 727 EXTERN_C_BEGIN 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 730 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 731 { 732 PetscErrorCode ierr; 733 PetscMPIInt size; 734 MPI_Comm comm; 735 Mat B=*newmat; 736 Mat_MUMPS *mumps; 737 738 PetscFunctionBegin; 739 if (reuse == MAT_INITIAL_MATRIX) { 740 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 741 } 742 743 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 744 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 745 746 mumps->MatDuplicate = A->ops->duplicate; 747 mumps->MatView = A->ops->view; 748 mumps->MatAssemblyEnd = A->ops->assemblyend; 749 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 750 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 751 mumps->MatDestroy = A->ops->destroy; 752 mumps->specialdestroy = MatDestroy_AIJMUMPS; 753 mumps->CleanUpMUMPS = PETSC_FALSE; 754 mumps->isAIJ = PETSC_TRUE; 755 756 B->spptr = (void*)mumps; 757 B->ops->duplicate = MatDuplicate_MUMPS; 758 B->ops->view = MatView_AIJMUMPS; 759 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 760 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 761 B->ops->destroy = MatDestroy_MUMPS; 762 763 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 764 if (size == 1) { 765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 766 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 767 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 768 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 769 } else { 770 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 771 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 773 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 774 } 775 776 ierr = PetscLogInfo((0,"MatConvert_AIJ_AIJMUMPS:Using MUMPS for LU factorization and solves.\n"));CHKERRQ(ierr); 777 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 778 *newmat = B; 779 PetscFunctionReturn(0); 780 } 781 EXTERN_C_END 782 783 /*MC 784 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 785 and sequential matrices via the external package MUMPS. 786 787 If MUMPS is installed (see the manual for instructions 788 on how to declare the existence of external packages), 789 a matrix type can be constructed which invokes MUMPS solvers. 790 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 791 792 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 793 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 794 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 795 for communicators controlling multiple processes. It is recommended that you call both of 796 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 797 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 798 without data copy. 799 800 Options Database Keys: 801 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 802 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 803 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 804 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 805 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 806 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 807 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 808 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 809 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 810 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 811 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 812 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 813 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 814 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 815 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 816 817 Level: beginner 818 819 .seealso: MATSBAIJMUMPS 820 M*/ 821 822 EXTERN_C_BEGIN 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatCreate_AIJMUMPS" 825 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 826 { 827 PetscErrorCode ierr; 828 int size; 829 Mat A_diag; 830 MPI_Comm comm; 831 832 PetscFunctionBegin; 833 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 834 /* and AIJMUMPS types */ 835 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 836 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 837 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 838 if (size == 1) { 839 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 840 } else { 841 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 842 A_diag = ((Mat_MPIAIJ *)A->data)->A; 843 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 844 } 845 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848 EXTERN_C_END 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 852 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 853 { 854 PetscErrorCode ierr; 855 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 856 857 PetscFunctionBegin; 858 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 859 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 860 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 861 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 862 PetscFunctionReturn(0); 863 } 864 865 EXTERN_C_BEGIN 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 868 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 869 { 870 Mat A; 871 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 872 PetscErrorCode ierr; 873 874 PetscFunctionBegin; 875 /* 876 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 877 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 878 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 879 block size info so that PETSc can determine the local size properly. The block size info is set 880 in the preallocation routine. 881 */ 882 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 883 A = ((Mat_MPISBAIJ *)B->data)->A; 884 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 885 PetscFunctionReturn(0); 886 } 887 EXTERN_C_END 888 889 EXTERN_C_BEGIN 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 892 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 893 { 894 PetscErrorCode ierr; 895 PetscMPIInt size; 896 MPI_Comm comm; 897 Mat B=*newmat; 898 Mat_MUMPS *mumps; 899 void (*f)(void); 900 901 PetscFunctionBegin; 902 if (reuse == MAT_INITIAL_MATRIX) { 903 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 904 } 905 906 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 907 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 908 909 mumps->MatDuplicate = A->ops->duplicate; 910 mumps->MatView = A->ops->view; 911 mumps->MatAssemblyEnd = A->ops->assemblyend; 912 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 913 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 914 mumps->MatDestroy = A->ops->destroy; 915 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 916 mumps->CleanUpMUMPS = PETSC_FALSE; 917 mumps->isAIJ = PETSC_FALSE; 918 919 B->spptr = (void*)mumps; 920 B->ops->duplicate = MatDuplicate_MUMPS; 921 B->ops->view = MatView_AIJMUMPS; 922 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 923 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 924 B->ops->destroy = MatDestroy_MUMPS; 925 926 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 927 if (size == 1) { 928 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 929 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 930 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 931 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 932 } else { 933 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 934 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr); 935 if (f) { /* This case should always be true when this routine is called */ 936 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 937 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 938 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 939 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 940 } 941 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 942 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 943 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 944 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 945 } 946 947 ierr = PetscLogInfo((0,"MatConvert_AIJ_AIJMUMPS:Using MUMPS for Cholesky factorization and solves.\n"));CHKERRQ(ierr); 948 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 949 *newmat = B; 950 PetscFunctionReturn(0); 951 } 952 EXTERN_C_END 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatDuplicate_MUMPS" 956 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 957 PetscErrorCode ierr; 958 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 959 960 PetscFunctionBegin; 961 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 962 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 /*MC 967 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 968 distributed and sequential matrices via the external package MUMPS. 969 970 If MUMPS is installed (see the manual for instructions 971 on how to declare the existence of external packages), 972 a matrix type can be constructed which invokes MUMPS solvers. 973 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 974 975 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 976 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 977 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 978 for communicators controlling multiple processes. It is recommended that you call both of 979 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 980 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 981 without data copy. 982 983 Options Database Keys: 984 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 985 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 986 . -mat_mumps_icntl_4 <0,...,4> - print level 987 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 988 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 989 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 990 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 991 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 992 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 993 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 994 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 995 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 996 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 997 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 998 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 999 1000 Level: beginner 1001 1002 .seealso: MATAIJMUMPS 1003 M*/ 1004 1005 EXTERN_C_BEGIN 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1008 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1009 { 1010 PetscErrorCode ierr; 1011 int size; 1012 1013 PetscFunctionBegin; 1014 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 1015 /* and SBAIJMUMPS types */ 1016 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 1017 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1018 if (size == 1) { 1019 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1020 } else { 1021 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1022 } 1023 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 EXTERN_C_END 1027