1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 PetscMPIInt myid,size; 36 int *irn,*jcn,sym; 37 PetscScalar *val; 38 MPI_Comm comm_mumps; 39 40 PetscTruth isAIJ,CleanUpMUMPS; 41 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 42 PetscErrorCode (*MatView)(Mat,PetscViewer); 43 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 44 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 45 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatDestroy)(Mat); 47 PetscErrorCode (*specialdestroy)(Mat); 48 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 49 } Mat_MUMPS; 50 51 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 52 EXTERN_C_BEGIN 53 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 54 EXTERN_C_END 55 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 56 /* 57 input: 58 A - matrix in mpiaij or mpisbaij (bs=1) format 59 shift - 0: C style output triple; 1: Fortran style output triple. 60 valOnly - FALSE: spaces are allocated and values are set for the triple 61 TRUE: only the values in v array are updated 62 output: 63 nnz - dim of r, c, and v (number of local nonzero entries of A) 64 r, c, v - row and col index, matrix values (matrix triples) 65 */ 66 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 67 PetscInt *ai, *aj, *bi, *bj, rstart,nz, *garray; 68 PetscErrorCode ierr; 69 PetscInt i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol; 70 PetscInt *row,*col; 71 PetscScalar *av, *bv,*val; 72 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 73 74 PetscFunctionBegin; 75 if (mumps->isAIJ){ 76 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 77 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 78 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 79 nz = aa->nz + bb->nz; 80 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 81 garray = mat->garray; 82 av=aa->a; bv=bb->a; 83 84 } else { 85 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 86 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 87 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 88 if (A->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->bs); 89 nz = aa->nz + bb->nz; 90 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 91 garray = mat->garray; 92 av=aa->a; bv=bb->a; 93 } 94 95 if (!valOnly){ 96 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 97 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 98 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 99 *r = row; *c = col; *v = val; 100 } else { 101 row = *r; col = *c; val = *v; 102 } 103 *nnz = nz; 104 105 jj = 0; irow = rstart; 106 for ( i=0; i<m; i++ ) { 107 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 108 countA = ai[i+1] - ai[i]; 109 countB = bi[i+1] - bi[i]; 110 bjj = bj + bi[i]; 111 112 /* get jB, the starting local col index for the 2nd B-part */ 113 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 114 j=-1; 115 do { 116 j++; 117 if (j == countB) break; 118 jcol = garray[bjj[j]]; 119 } while (jcol < colA_start); 120 jB = j; 121 122 /* B-part, smaller col index */ 123 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 124 for (j=0; j<jB; j++){ 125 jcol = garray[bjj[j]]; 126 if (!valOnly){ 127 row[jj] = irow + shift; col[jj] = jcol + shift; 128 129 } 130 val[jj++] = *bv++; 131 } 132 /* A-part */ 133 for (j=0; j<countA; j++){ 134 if (!valOnly){ 135 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 136 } 137 val[jj++] = *av++; 138 } 139 /* B-part, larger col index */ 140 for (j=jB; j<countB; j++){ 141 if (!valOnly){ 142 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 143 } 144 val[jj++] = *bv++; 145 } 146 irow++; 147 } 148 149 PetscFunctionReturn(0); 150 } 151 152 EXTERN_C_BEGIN 153 #undef __FUNCT__ 154 #define __FUNCT__ "MatConvert_MUMPS_Base" 155 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) \ 156 { 157 PetscErrorCode ierr; 158 Mat B=*newmat; 159 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 160 void (*f)(void); 161 162 PetscFunctionBegin; 163 if (reuse == MAT_INITIAL_MATRIX) { 164 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 165 } 166 B->ops->duplicate = mumps->MatDuplicate; 167 B->ops->view = mumps->MatView; 168 B->ops->assemblyend = mumps->MatAssemblyEnd; 169 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 170 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 171 B->ops->destroy = mumps->MatDestroy; 172 173 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidFunction)&f);CHKERRQ(ierr); 174 if (f) { 175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(FCNVOID)mumps->MatPreallocate);CHKERRQ(ierr); 176 } 177 ierr = PetscFree(mumps);CHKERRQ(ierr); 178 179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 187 188 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 189 *newmat = B; 190 PetscFunctionReturn(0); 191 } 192 EXTERN_C_END 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatDestroy_MUMPS" 196 PetscErrorCode MatDestroy_MUMPS(Mat A) 197 { 198 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 199 PetscErrorCode ierr; 200 PetscMPIInt size=lu->size; 201 PetscErrorCode (*specialdestroy)(Mat); 202 PetscFunctionBegin; 203 if (lu->CleanUpMUMPS) { 204 /* Terminate instance, deallocate memories */ 205 lu->id.job=JOB_END; 206 #if defined(PETSC_USE_COMPLEX) 207 zmumps_c(&lu->id); 208 #else 209 dmumps_c(&lu->id); 210 #endif 211 if (lu->irn) { 212 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 213 } 214 if (lu->jcn) { 215 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 216 } 217 if (size>1 && lu->val) { 218 ierr = PetscFree(lu->val);CHKERRQ(ierr); 219 } 220 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 221 } 222 specialdestroy = lu->specialdestroy; 223 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 224 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 #undef __FUNCT__ 229 #define __FUNCT__ "MatDestroy_AIJMUMPS" 230 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 231 { 232 PetscErrorCode ierr; 233 int size; 234 235 PetscFunctionBegin; 236 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 237 if (size==1) { 238 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 239 } else { 240 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 241 } 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 247 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 248 { 249 PetscErrorCode ierr; 250 int size; 251 252 PetscFunctionBegin; 253 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 254 if (size==1) { 255 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 256 } else { 257 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatFactorInfo_MUMPS" 264 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 265 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 266 PetscErrorCode ierr; 267 268 PetscFunctionBegin; 269 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 270 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 271 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 274 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 275 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 276 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 277 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 278 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 279 if (!lu->myid && lu->id.ICNTL(11)>0) { 280 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 281 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 282 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 283 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 284 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 285 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 286 287 } 288 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 289 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 290 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 291 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(15) (efficiency control): %d \n",lu->id.ICNTL(15));CHKERRQ(ierr); 292 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 293 294 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 295 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 296 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 297 298 /* infomation local to each processor */ 299 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 300 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 301 ierr = PetscSynchronizedFlush(A->comm); 302 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 303 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 304 ierr = PetscSynchronizedFlush(A->comm); 305 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 306 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 307 ierr = PetscSynchronizedFlush(A->comm); 308 309 if (!lu->myid){ /* information from the host */ 310 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 311 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 312 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 313 314 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 315 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 316 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 317 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 318 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 319 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 320 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 321 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 322 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 323 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 324 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 325 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 326 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 327 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 328 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 329 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 330 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 331 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 332 } 333 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatView_MUMPS" 339 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) { 340 PetscErrorCode ierr; 341 PetscTruth iascii; 342 PetscViewerFormat format; 343 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 344 345 PetscFunctionBegin; 346 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 347 348 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 349 if (iascii) { 350 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 351 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 352 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 353 } 354 } 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatSolve_AIJMUMPS" 360 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 361 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 362 PetscScalar *array; 363 Vec x_seq; 364 IS iden; 365 VecScatter scat; 366 PetscErrorCode ierr; 367 368 PetscFunctionBegin; 369 if (lu->size > 1){ 370 if (!lu->myid){ 371 ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr); 372 ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr); 373 } else { 374 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr); 375 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr); 376 } 377 ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr); 378 ierr = ISDestroy(iden);CHKERRQ(ierr); 379 380 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 381 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 382 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 383 } else { /* size == 1 */ 384 ierr = VecCopy(b,x);CHKERRQ(ierr); 385 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 386 } 387 if (!lu->myid) { /* define rhs on the host */ 388 #if defined(PETSC_USE_COMPLEX) 389 lu->id.rhs = (mumps_double_complex*)array; 390 #else 391 lu->id.rhs = array; 392 #endif 393 } 394 395 /* solve phase */ 396 lu->id.job=3; 397 #if defined(PETSC_USE_COMPLEX) 398 zmumps_c(&lu->id); 399 #else 400 dmumps_c(&lu->id); 401 #endif 402 if (lu->id.INFOG(1) < 0) { 403 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 404 } 405 406 /* convert mumps solution x_seq to petsc mpi x */ 407 if (lu->size > 1) { 408 if (!lu->myid){ 409 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 410 } 411 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 412 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 413 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 414 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 415 } else { 416 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 417 } 418 419 PetscFunctionReturn(0); 420 } 421 422 /* 423 input: 424 F: numeric factor 425 output: 426 nneg: total number of negative pivots 427 nzero: 0 428 npos: (global dimension of F) - nneg 429 */ 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 433 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 434 { 435 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 436 PetscErrorCode ierr; 437 PetscMPIInt size; 438 439 PetscFunctionBegin; 440 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 441 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 442 if (size > 1 && lu->id.ICNTL(13) != 1){ 443 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 444 } 445 if (nneg){ 446 if (!lu->myid){ 447 *nneg = lu->id.INFOG(12); 448 } 449 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 450 } 451 if (nzero) *nzero = 0; 452 if (npos) *npos = F->M - (*nneg); 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS" 458 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F) 459 { 460 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 461 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 462 PetscErrorCode ierr; 463 PetscInt rnz,nnz,nz=0,i,M=A->M,*ai,*aj,icntl; 464 PetscTruth valOnly,flg; 465 Mat F_diag; 466 467 PetscFunctionBegin; 468 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 469 (*F)->ops->solve = MatSolve_AIJMUMPS; 470 471 /* Initialize a MUMPS instance */ 472 ierr = MPI_Comm_rank(A->comm, &lu->myid); 473 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 474 lua->myid = lu->myid; lua->size = lu->size; 475 lu->id.job = JOB_INIT; 476 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 477 ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr); 478 479 /* Set mumps options */ 480 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 481 lu->id.par=1; /* host participates factorizaton and solve */ 482 lu->id.sym=lu->sym; 483 if (lu->sym == 2){ 484 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 485 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 486 } 487 #if defined(PETSC_USE_COMPLEX) 488 zmumps_c(&lu->id); 489 #else 490 dmumps_c(&lu->id); 491 #endif 492 493 if (lu->size == 1){ 494 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 495 } else { 496 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 497 } 498 499 icntl=-1; 500 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 501 if ((flg && icntl > 0) || PetscLogPrintInfo) { 502 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 503 } else { /* no output */ 504 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 505 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 506 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 507 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 508 } 509 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 510 icntl=-1; 511 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 512 if (flg) { 513 if (icntl== 1){ 514 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 515 } else { 516 lu->id.ICNTL(7) = icntl; 517 } 518 } 519 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 520 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 521 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 522 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 523 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 524 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 525 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 526 527 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 528 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 529 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 530 PetscOptionsEnd(); 531 } 532 533 /* define matrix A */ 534 switch (lu->id.ICNTL(18)){ 535 case 0: /* centralized assembled matrix input (size=1) */ 536 if (!lu->myid) { 537 if (lua->isAIJ){ 538 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 539 nz = aa->nz; 540 ai = aa->i; aj = aa->j; lu->val = aa->a; 541 } else { 542 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 543 nz = aa->nz; 544 ai = aa->i; aj = aa->j; lu->val = aa->a; 545 } 546 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 547 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 548 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 549 nz = 0; 550 for (i=0; i<M; i++){ 551 rnz = ai[i+1] - ai[i]; 552 while (rnz--) { /* Fortran row/col index! */ 553 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 554 } 555 } 556 } 557 } 558 break; 559 case 3: /* distributed assembled matrix input (size>1) */ 560 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 561 valOnly = PETSC_FALSE; 562 } else { 563 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 564 } 565 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 566 break; 567 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 568 } 569 570 /* analysis phase */ 571 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 572 lu->id.n = M; 573 switch (lu->id.ICNTL(18)){ 574 case 0: /* centralized assembled matrix input */ 575 if (!lu->myid) { 576 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 577 if (lu->id.ICNTL(6)>1){ 578 #if defined(PETSC_USE_COMPLEX) 579 lu->id.a = (mumps_double_complex*)lu->val; 580 #else 581 lu->id.a = lu->val; 582 #endif 583 } 584 } 585 break; 586 case 3: /* distributed assembled matrix input (size>1) */ 587 lu->id.nz_loc = nnz; 588 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 589 if (lu->id.ICNTL(6)>1) { 590 #if defined(PETSC_USE_COMPLEX) 591 lu->id.a_loc = (mumps_double_complex*)lu->val; 592 #else 593 lu->id.a_loc = lu->val; 594 #endif 595 } 596 break; 597 } 598 lu->id.job=1; 599 #if defined(PETSC_USE_COMPLEX) 600 zmumps_c(&lu->id); 601 #else 602 dmumps_c(&lu->id); 603 #endif 604 if (lu->id.INFOG(1) < 0) { 605 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 606 } 607 } 608 609 /* numerical factorization phase */ 610 if(!lu->id.ICNTL(18)) { 611 if (!lu->myid) { 612 #if defined(PETSC_USE_COMPLEX) 613 lu->id.a = (mumps_double_complex*)lu->val; 614 #else 615 lu->id.a = lu->val; 616 #endif 617 } 618 } else { 619 #if defined(PETSC_USE_COMPLEX) 620 lu->id.a_loc = (mumps_double_complex*)lu->val; 621 #else 622 lu->id.a_loc = lu->val; 623 #endif 624 } 625 lu->id.job=2; 626 #if defined(PETSC_USE_COMPLEX) 627 zmumps_c(&lu->id); 628 #else 629 dmumps_c(&lu->id); 630 #endif 631 if (lu->id.INFOG(1) < 0) { 632 if (lu->id.INFO(1) == -13) { 633 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 634 } else { 635 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 636 } 637 } 638 639 if (!lu->myid && lu->id.ICNTL(16) > 0){ 640 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 641 } 642 643 if (lu->size > 1){ 644 if ((*F)->factor == FACTOR_LU){ 645 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 646 } else { 647 F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A; 648 } 649 F_diag->assembled = PETSC_TRUE; 650 } 651 (*F)->assembled = PETSC_TRUE; 652 lu->matstruc = SAME_NONZERO_PATTERN; 653 lu->CleanUpMUMPS = PETSC_TRUE; 654 PetscFunctionReturn(0); 655 } 656 657 /* Note the Petsc r and c permutations are ignored */ 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 660 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 661 Mat B; 662 Mat_MUMPS *lu; 663 PetscErrorCode ierr; 664 665 PetscFunctionBegin; 666 /* Create the factorization matrix */ 667 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 668 ierr = MatSetSizes(B,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 669 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 670 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 671 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 672 673 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 674 B->factor = FACTOR_LU; 675 lu = (Mat_MUMPS*)B->spptr; 676 lu->sym = 0; 677 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 678 679 *F = B; 680 PetscFunctionReturn(0); 681 } 682 683 /* Note the Petsc r permutation is ignored */ 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 686 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 687 Mat B; 688 Mat_MUMPS *lu; 689 PetscErrorCode ierr; 690 691 PetscFunctionBegin; 692 /* Create the factorization matrix */ 693 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 694 ierr = MatSetSizes(B,A->m,A->n,A->M,A->N);CHKERRQ(ierr); 695 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 696 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 697 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 698 699 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 700 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 701 B->factor = FACTOR_CHOLESKY; 702 lu = (Mat_MUMPS*)B->spptr; 703 lu->sym = 2; 704 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 705 706 *F = B; 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 712 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 713 PetscErrorCode ierr; 714 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 715 716 PetscFunctionBegin; 717 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 718 719 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 720 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 721 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 722 PetscFunctionReturn(0); 723 } 724 725 EXTERN_C_BEGIN 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 728 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 729 { 730 PetscErrorCode ierr; 731 PetscMPIInt size; 732 MPI_Comm comm; 733 Mat B=*newmat; 734 Mat_MUMPS *mumps; 735 736 PetscFunctionBegin; 737 if (reuse == MAT_INITIAL_MATRIX) { 738 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 739 } 740 741 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 742 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 743 744 mumps->MatDuplicate = A->ops->duplicate; 745 mumps->MatView = A->ops->view; 746 mumps->MatAssemblyEnd = A->ops->assemblyend; 747 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 748 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 749 mumps->MatDestroy = A->ops->destroy; 750 mumps->specialdestroy = MatDestroy_AIJMUMPS; 751 mumps->CleanUpMUMPS = PETSC_FALSE; 752 mumps->isAIJ = PETSC_TRUE; 753 754 B->spptr = (void*)mumps; 755 B->ops->duplicate = MatDuplicate_MUMPS; 756 B->ops->view = MatView_MUMPS; 757 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 758 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 759 B->ops->destroy = MatDestroy_MUMPS; 760 761 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 762 if (size == 1) { 763 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 764 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 766 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 767 } else { 768 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 769 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 770 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 771 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 772 } 773 774 ierr = PetscVerboseInfo((0,"MatConvert_AIJ_AIJMUMPS:Using MUMPS for LU factorization and solves.\n"));CHKERRQ(ierr); 775 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 776 *newmat = B; 777 PetscFunctionReturn(0); 778 } 779 EXTERN_C_END 780 781 /*MC 782 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 783 and sequential matrices via the external package MUMPS. 784 785 If MUMPS is installed (see the manual for instructions 786 on how to declare the existence of external packages), 787 a matrix type can be constructed which invokes MUMPS solvers. 788 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 789 790 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 791 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 792 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 793 for communicators controlling multiple processes. It is recommended that you call both of 794 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 795 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 796 without data copy. 797 798 Options Database Keys: 799 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 800 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 801 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 802 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 803 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 804 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 805 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 806 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 807 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 808 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 809 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 810 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 811 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 812 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 813 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 814 815 Level: beginner 816 817 .seealso: MATSBAIJMUMPS 818 M*/ 819 820 EXTERN_C_BEGIN 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatCreate_AIJMUMPS" 823 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 824 { 825 PetscErrorCode ierr; 826 PetscMPIInt size; 827 Mat A_diag; 828 MPI_Comm comm; 829 830 PetscFunctionBegin; 831 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 832 /* and AIJMUMPS types */ 833 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 834 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 835 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 836 if (size == 1) { 837 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 838 } else { 839 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 840 A_diag = ((Mat_MPIAIJ *)A->data)->A; 841 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 842 } 843 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 844 PetscFunctionReturn(0); 845 } 846 EXTERN_C_END 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 850 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 851 { 852 PetscErrorCode ierr; 853 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 854 855 PetscFunctionBegin; 856 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 857 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 858 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 859 PetscFunctionReturn(0); 860 } 861 862 EXTERN_C_BEGIN 863 #undef __FUNCT__ 864 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 865 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 866 { 867 Mat A; 868 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 869 PetscErrorCode ierr; 870 871 PetscFunctionBegin; 872 /* 873 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 874 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 875 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 876 block size info so that PETSc can determine the local size properly. The block size info is set 877 in the preallocation routine. 878 */ 879 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 880 A = ((Mat_MPISBAIJ *)B->data)->A; 881 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } 884 EXTERN_C_END 885 886 EXTERN_C_BEGIN 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 889 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 890 { 891 PetscErrorCode ierr; 892 PetscMPIInt size; 893 MPI_Comm comm; 894 Mat B=*newmat; 895 Mat_MUMPS *mumps; 896 void (*f)(void); 897 898 PetscFunctionBegin; 899 if (reuse == MAT_INITIAL_MATRIX) { 900 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 901 } 902 903 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 904 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 905 906 mumps->MatDuplicate = A->ops->duplicate; 907 mumps->MatView = A->ops->view; 908 mumps->MatAssemblyEnd = A->ops->assemblyend; 909 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 910 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 911 mumps->MatDestroy = A->ops->destroy; 912 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 913 mumps->CleanUpMUMPS = PETSC_FALSE; 914 mumps->isAIJ = PETSC_FALSE; 915 916 B->spptr = (void*)mumps; 917 B->ops->duplicate = MatDuplicate_MUMPS; 918 B->ops->view = MatView_MUMPS; 919 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 920 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 921 B->ops->destroy = MatDestroy_MUMPS; 922 923 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 924 if (size == 1) { 925 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 926 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 927 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 928 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 929 } else { 930 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 931 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidFunction)&f);CHKERRQ(ierr); 932 if (f) { /* This case should always be true when this routine is called */ 933 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 934 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 935 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 936 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 937 } 938 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 939 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 940 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 941 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 942 } 943 944 ierr = PetscVerboseInfo((0,"MatConvert_SBAIJ_SBAIJMUMPS:Using MUMPS for Cholesky factorization and solves.\n"));CHKERRQ(ierr); 945 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 946 *newmat = B; 947 PetscFunctionReturn(0); 948 } 949 EXTERN_C_END 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatDuplicate_MUMPS" 953 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 954 PetscErrorCode ierr; 955 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 956 957 PetscFunctionBegin; 958 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 959 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 /*MC 964 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 965 distributed and sequential matrices via the external package MUMPS. 966 967 If MUMPS is installed (see the manual for instructions 968 on how to declare the existence of external packages), 969 a matrix type can be constructed which invokes MUMPS solvers. 970 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 971 972 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 973 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 974 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 975 for communicators controlling multiple processes. It is recommended that you call both of 976 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 977 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 978 without data copy. 979 980 Options Database Keys: 981 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 982 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 983 . -mat_mumps_icntl_4 <0,...,4> - print level 984 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 985 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 986 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 987 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 988 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 989 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 990 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 991 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 992 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 993 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 994 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 995 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 996 997 Level: beginner 998 999 .seealso: MATAIJMUMPS 1000 M*/ 1001 1002 EXTERN_C_BEGIN 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1005 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1006 { 1007 PetscErrorCode ierr; 1008 PetscMPIInt size; 1009 1010 PetscFunctionBegin; 1011 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 1012 /* and SBAIJMUMPS types */ 1013 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 1014 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1015 if (size == 1) { 1016 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1017 } else { 1018 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1019 } 1020 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 EXTERN_C_END 1024