1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->rmap.n; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 252 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 253 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 254 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,**bi_ptr; 274 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr); 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 310 nzi += nlnk; 311 312 /* add pivot rows into linked list */ 313 row = lnk[n]; 314 while (row < i) { 315 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 316 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 317 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 318 nzi += nlnk; 319 row = lnk[row]; 320 } 321 bi[i+1] = bi[i] + nzi; 322 im[i] = nzi; 323 324 /* mark bdiag */ 325 nzbd = 0; 326 nnz = nzi; 327 k = lnk[n]; 328 while (nnz-- && k < i){ 329 nzbd++; 330 k = lnk[k]; 331 } 332 bdiag[i] = bi[i] + nzbd; 333 334 /* if free space is not available, make more free space */ 335 if (current_space->local_remaining<nzi) { 336 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 337 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 338 reallocs++; 339 } 340 341 /* copy data into free space, then initialize lnk */ 342 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 343 bi_ptr[i] = current_space->array; 344 current_space->array += nzi; 345 current_space->local_used += nzi; 346 current_space->local_remaining -= nzi; 347 } 348 #if defined(PETSC_USE_INFO) 349 if (ai[n] != 0) { 350 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 351 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 352 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 353 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 354 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 355 } else { 356 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 357 } 358 #endif 359 360 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 361 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 362 363 /* destroy list of free space and other temporary array(s) */ 364 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 365 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 366 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 367 ierr = PetscFree(im);CHKERRQ(ierr); 368 369 /* put together the new matrix */ 370 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 371 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 372 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 373 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 374 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 375 b = (Mat_SeqAIJ*)(*B)->data; 376 b->freedata = PETSC_TRUE; 377 b->singlemalloc = PETSC_FALSE; 378 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 379 b->j = bj; 380 b->i = bi; 381 b->diag = bdiag; 382 b->ilen = 0; 383 b->imax = 0; 384 b->row = isrow; 385 b->col = iscol; 386 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 387 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 388 b->icol = isicol; 389 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 390 391 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 392 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 393 b->maxnz = b->nz = bi[n] ; 394 395 (*B)->factor = FACTOR_LU; 396 (*B)->info.factor_mallocs = reallocs; 397 (*B)->info.fill_ratio_given = f; 398 399 if (ai[n] != 0) { 400 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 401 } else { 402 (*B)->info.fill_ratio_needed = 0.0; 403 } 404 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 405 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 406 PetscFunctionReturn(0); 407 } 408 409 /* 410 Trouble in factorization, should we dump the original matrix? 411 */ 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatFactorDumpMatrix" 414 PetscErrorCode MatFactorDumpMatrix(Mat A) 415 { 416 PetscErrorCode ierr; 417 PetscTruth flg; 418 419 PetscFunctionBegin; 420 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 421 if (flg) { 422 PetscViewer viewer; 423 char filename[PETSC_MAX_PATH_LEN]; 424 425 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 426 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 427 ierr = MatView(A,viewer);CHKERRQ(ierr); 428 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 429 } 430 PetscFunctionReturn(0); 431 } 432 433 /* ----------------------------------------------------------- */ 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 436 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 437 { 438 Mat C=*B; 439 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 440 IS isrow = b->row,isicol = b->icol; 441 PetscErrorCode ierr; 442 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 443 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 444 PetscInt *diag_offset = b->diag,diag,*pj; 445 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 446 PetscScalar d; 447 PetscReal rs; 448 LUShift_Ctx sctx; 449 PetscInt newshift,*ddiag; 450 451 PetscFunctionBegin; 452 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 453 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 454 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 455 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 456 rtmps = rtmp; ics = ic; 457 458 sctx.shift_top = 0; 459 sctx.nshift_max = 0; 460 sctx.shift_lo = 0; 461 sctx.shift_hi = 0; 462 463 if (!a->diag) { 464 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 465 } 466 /* if both shift schemes are chosen by user, only use info->shiftpd */ 467 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 468 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 469 PetscInt *aai = a->i; 470 ddiag = a->diag; 471 sctx.shift_top = 0; 472 for (i=0; i<n; i++) { 473 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 474 d = (a->a)[ddiag[i]]; 475 rs = -PetscAbsScalar(d) - PetscRealPart(d); 476 v = a->a+aai[i]; 477 nz = aai[i+1] - aai[i]; 478 for (j=0; j<nz; j++) 479 rs += PetscAbsScalar(v[j]); 480 if (rs>sctx.shift_top) sctx.shift_top = rs; 481 } 482 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 483 sctx.shift_top *= 1.1; 484 sctx.nshift_max = 5; 485 sctx.shift_lo = 0.; 486 sctx.shift_hi = 1.; 487 } 488 489 sctx.shift_amount = 0; 490 sctx.nshift = 0; 491 do { 492 sctx.lushift = PETSC_FALSE; 493 for (i=0; i<n; i++){ 494 nz = bi[i+1] - bi[i]; 495 bjtmp = bj + bi[i]; 496 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 497 498 /* load in initial (unfactored row) */ 499 nz = a->i[r[i]+1] - a->i[r[i]]; 500 ajtmp = a->j + a->i[r[i]]; 501 v = a->a + a->i[r[i]]; 502 for (j=0; j<nz; j++) { 503 rtmp[ics[ajtmp[j]]] = v[j]; 504 } 505 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 506 507 row = *bjtmp++; 508 while (row < i) { 509 pc = rtmp + row; 510 if (*pc != 0.0) { 511 pv = b->a + diag_offset[row]; 512 pj = b->j + diag_offset[row] + 1; 513 multiplier = *pc / *pv++; 514 *pc = multiplier; 515 nz = bi[row+1] - diag_offset[row] - 1; 516 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 517 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 518 } 519 row = *bjtmp++; 520 } 521 /* finished row so stick it into b->a */ 522 pv = b->a + bi[i] ; 523 pj = b->j + bi[i] ; 524 nz = bi[i+1] - bi[i]; 525 diag = diag_offset[i] - bi[i]; 526 rs = 0.0; 527 for (j=0; j<nz; j++) { 528 pv[j] = rtmps[pj[j]]; 529 if (j != diag) rs += PetscAbsScalar(pv[j]); 530 } 531 532 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 533 sctx.rs = rs; 534 sctx.pv = pv[diag]; 535 ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr); 536 if (newshift == 1) break; 537 } 538 539 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 540 /* 541 * if no shift in this attempt & shifting & started shifting & can refine, 542 * then try lower shift 543 */ 544 sctx.shift_hi = info->shift_fraction; 545 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 546 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 547 sctx.lushift = PETSC_TRUE; 548 sctx.nshift++; 549 } 550 } while (sctx.lushift); 551 552 /* invert diagonal entries for simplier triangular solves */ 553 for (i=0; i<n; i++) { 554 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 555 } 556 557 ierr = PetscFree(rtmp);CHKERRQ(ierr); 558 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 559 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 560 C->factor = FACTOR_LU; 561 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 562 C->assembled = PETSC_TRUE; 563 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 564 if (sctx.nshift){ 565 if (info->shiftnz) { 566 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 567 } else if (info->shiftpd) { 568 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 569 } 570 } 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 576 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 577 { 578 PetscFunctionBegin; 579 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 580 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 581 PetscFunctionReturn(0); 582 } 583 584 585 /* ----------------------------------------------------------- */ 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatLUFactor_SeqAIJ" 588 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 589 { 590 PetscErrorCode ierr; 591 Mat C; 592 593 PetscFunctionBegin; 594 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 595 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 596 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 597 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 598 PetscFunctionReturn(0); 599 } 600 /* ----------------------------------------------------------- */ 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatSolve_SeqAIJ" 603 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 604 { 605 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 606 IS iscol = a->col,isrow = a->row; 607 PetscErrorCode ierr; 608 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 609 PetscInt nz,*rout,*cout; 610 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 611 612 PetscFunctionBegin; 613 if (!n) PetscFunctionReturn(0); 614 615 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 616 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 617 tmp = a->solve_work; 618 619 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 620 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 621 622 /* forward solve the lower triangular */ 623 tmp[0] = b[*r++]; 624 tmps = tmp; 625 for (i=1; i<n; i++) { 626 v = aa + ai[i] ; 627 vi = aj + ai[i] ; 628 nz = a->diag[i] - ai[i]; 629 sum = b[*r++]; 630 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 631 tmp[i] = sum; 632 } 633 634 /* backward solve the upper triangular */ 635 for (i=n-1; i>=0; i--){ 636 v = aa + a->diag[i] + 1; 637 vi = aj + a->diag[i] + 1; 638 nz = ai[i+1] - a->diag[i] - 1; 639 sum = tmp[i]; 640 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 641 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 642 } 643 644 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 645 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 646 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 647 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 648 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMatSolve_SeqAIJ" 654 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 655 { 656 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 657 IS iscol = a->col,isrow = a->row; 658 PetscErrorCode ierr; 659 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 660 PetscInt nz,*rout,*cout,neq; 661 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 662 663 PetscFunctionBegin; 664 if (!n) PetscFunctionReturn(0); 665 666 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 667 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 668 669 tmp = a->solve_work; 670 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 671 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 672 673 for (neq=0; neq<n; neq++){ 674 /* forward solve the lower triangular */ 675 tmp[0] = b[r[0]]; 676 tmps = tmp; 677 for (i=1; i<n; i++) { 678 v = aa + ai[i] ; 679 vi = aj + ai[i] ; 680 nz = a->diag[i] - ai[i]; 681 sum = b[r[i]]; 682 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 683 tmp[i] = sum; 684 } 685 /* backward solve the upper triangular */ 686 for (i=n-1; i>=0; i--){ 687 v = aa + a->diag[i] + 1; 688 vi = aj + a->diag[i] + 1; 689 nz = ai[i+1] - a->diag[i] - 1; 690 sum = tmp[i]; 691 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 692 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 693 } 694 695 b += n; 696 x += n; 697 } 698 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 699 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 700 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 701 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 702 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 /* ----------------------------------------------------------- */ 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 709 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 710 { 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 714 PetscScalar *x,*b,*aa = a->a; 715 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 716 PetscInt adiag_i,i,*vi,nz,ai_i; 717 PetscScalar *v,sum; 718 #endif 719 720 PetscFunctionBegin; 721 if (!n) PetscFunctionReturn(0); 722 723 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 724 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 725 726 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 727 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 728 #else 729 /* forward solve the lower triangular */ 730 x[0] = b[0]; 731 for (i=1; i<n; i++) { 732 ai_i = ai[i]; 733 v = aa + ai_i; 734 vi = aj + ai_i; 735 nz = adiag[i] - ai_i; 736 sum = b[i]; 737 while (nz--) sum -= *v++ * x[*vi++]; 738 x[i] = sum; 739 } 740 741 /* backward solve the upper triangular */ 742 for (i=n-1; i>=0; i--){ 743 adiag_i = adiag[i]; 744 v = aa + adiag_i + 1; 745 vi = aj + adiag_i + 1; 746 nz = ai[i+1] - adiag_i - 1; 747 sum = x[i]; 748 while (nz--) sum -= *v++ * x[*vi++]; 749 x[i] = sum*aa[adiag_i]; 750 } 751 #endif 752 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 753 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 754 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 760 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 761 { 762 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 763 IS iscol = a->col,isrow = a->row; 764 PetscErrorCode ierr; 765 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 766 PetscInt nz,*rout,*cout; 767 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 768 769 PetscFunctionBegin; 770 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 771 772 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 773 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 774 tmp = a->solve_work; 775 776 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 777 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 778 779 /* forward solve the lower triangular */ 780 tmp[0] = b[*r++]; 781 for (i=1; i<n; i++) { 782 v = aa + ai[i] ; 783 vi = aj + ai[i] ; 784 nz = a->diag[i] - ai[i]; 785 sum = b[*r++]; 786 while (nz--) sum -= *v++ * tmp[*vi++ ]; 787 tmp[i] = sum; 788 } 789 790 /* backward solve the upper triangular */ 791 for (i=n-1; i>=0; i--){ 792 v = aa + a->diag[i] + 1; 793 vi = aj + a->diag[i] + 1; 794 nz = ai[i+1] - a->diag[i] - 1; 795 sum = tmp[i]; 796 while (nz--) sum -= *v++ * tmp[*vi++ ]; 797 tmp[i] = sum*aa[a->diag[i]]; 798 x[*c--] += tmp[i]; 799 } 800 801 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 802 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 803 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 804 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 805 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 806 807 PetscFunctionReturn(0); 808 } 809 /* -------------------------------------------------------------------*/ 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 812 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 813 { 814 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 815 IS iscol = a->col,isrow = a->row; 816 PetscErrorCode ierr; 817 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 818 PetscInt nz,*rout,*cout,*diag = a->diag; 819 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 820 821 PetscFunctionBegin; 822 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 823 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 824 tmp = a->solve_work; 825 826 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 827 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 828 829 /* copy the b into temp work space according to permutation */ 830 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 831 832 /* forward solve the U^T */ 833 for (i=0; i<n; i++) { 834 v = aa + diag[i] ; 835 vi = aj + diag[i] + 1; 836 nz = ai[i+1] - diag[i] - 1; 837 s1 = tmp[i]; 838 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 839 while (nz--) { 840 tmp[*vi++ ] -= (*v++)*s1; 841 } 842 tmp[i] = s1; 843 } 844 845 /* backward solve the L^T */ 846 for (i=n-1; i>=0; i--){ 847 v = aa + diag[i] - 1 ; 848 vi = aj + diag[i] - 1 ; 849 nz = diag[i] - ai[i]; 850 s1 = tmp[i]; 851 while (nz--) { 852 tmp[*vi-- ] -= (*v--)*s1; 853 } 854 } 855 856 /* copy tmp into x according to permutation */ 857 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 858 859 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 860 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 861 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 862 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 863 864 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 870 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 871 { 872 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 873 IS iscol = a->col,isrow = a->row; 874 PetscErrorCode ierr; 875 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 876 PetscInt nz,*rout,*cout,*diag = a->diag; 877 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 878 879 PetscFunctionBegin; 880 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 881 882 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 883 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 884 tmp = a->solve_work; 885 886 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 887 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 888 889 /* copy the b into temp work space according to permutation */ 890 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 891 892 /* forward solve the U^T */ 893 for (i=0; i<n; i++) { 894 v = aa + diag[i] ; 895 vi = aj + diag[i] + 1; 896 nz = ai[i+1] - diag[i] - 1; 897 tmp[i] *= *v++; 898 while (nz--) { 899 tmp[*vi++ ] -= (*v++)*tmp[i]; 900 } 901 } 902 903 /* backward solve the L^T */ 904 for (i=n-1; i>=0; i--){ 905 v = aa + diag[i] - 1 ; 906 vi = aj + diag[i] - 1 ; 907 nz = diag[i] - ai[i]; 908 while (nz--) { 909 tmp[*vi-- ] -= (*v--)*tmp[i]; 910 } 911 } 912 913 /* copy tmp into x according to permutation */ 914 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 915 916 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 917 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 918 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 919 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 920 921 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 /* ----------------------------------------------------------------*/ 925 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 929 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 930 { 931 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 932 IS isicol; 933 PetscErrorCode ierr; 934 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j; 935 PetscInt *bi,*cols,nnz,*cols_lvl; 936 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 937 PetscInt i,levels,diagonal_fill; 938 PetscTruth col_identity,row_identity; 939 PetscReal f; 940 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 941 PetscBT lnkbt; 942 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 943 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 944 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 945 946 PetscFunctionBegin; 947 f = info->fill; 948 levels = (PetscInt)info->levels; 949 diagonal_fill = (PetscInt)info->diagonal_fill; 950 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 951 952 /* special case that simply copies fill pattern */ 953 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 954 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 955 if (!levels && row_identity && col_identity) { 956 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 957 (*fact)->factor = FACTOR_LU; 958 (*fact)->info.factor_mallocs = 0; 959 (*fact)->info.fill_ratio_given = info->fill; 960 (*fact)->info.fill_ratio_needed = 1.0; 961 b = (Mat_SeqAIJ*)(*fact)->data; 962 if (!b->diag) { 963 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 964 } 965 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 966 b->row = isrow; 967 b->col = iscol; 968 b->icol = isicol; 969 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 970 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 971 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 972 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 977 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 978 979 /* get new row pointers */ 980 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 981 bi[0] = 0; 982 /* bdiag is location of diagonal in factor */ 983 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 984 bdiag[0] = 0; 985 986 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 987 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 988 989 /* create a linked list for storing column indices of the active row */ 990 nlnk = n + 1; 991 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 992 993 /* initial FreeSpace size is f*(ai[n]+1) */ 994 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 995 current_space = free_space; 996 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 997 current_space_lvl = free_space_lvl; 998 999 for (i=0; i<n; i++) { 1000 nzi = 0; 1001 /* copy current row into linked list */ 1002 nnz = ai[r[i]+1] - ai[r[i]]; 1003 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1004 cols = aj + ai[r[i]]; 1005 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1006 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1007 nzi += nlnk; 1008 1009 /* make sure diagonal entry is included */ 1010 if (diagonal_fill && lnk[i] == -1) { 1011 fm = n; 1012 while (lnk[fm] < i) fm = lnk[fm]; 1013 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1014 lnk[fm] = i; 1015 lnk_lvl[i] = 0; 1016 nzi++; dcount++; 1017 } 1018 1019 /* add pivot rows into the active row */ 1020 nzbd = 0; 1021 prow = lnk[n]; 1022 while (prow < i) { 1023 nnz = bdiag[prow]; 1024 cols = bj_ptr[prow] + nnz + 1; 1025 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1026 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1027 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1028 nzi += nlnk; 1029 prow = lnk[prow]; 1030 nzbd++; 1031 } 1032 bdiag[i] = nzbd; 1033 bi[i+1] = bi[i] + nzi; 1034 1035 /* if free space is not available, make more free space */ 1036 if (current_space->local_remaining<nzi) { 1037 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1038 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1039 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1040 reallocs++; 1041 } 1042 1043 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1044 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1045 bj_ptr[i] = current_space->array; 1046 bjlvl_ptr[i] = current_space_lvl->array; 1047 1048 /* make sure the active row i has diagonal entry */ 1049 if (*(bj_ptr[i]+bdiag[i]) != i) { 1050 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1051 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1052 } 1053 1054 current_space->array += nzi; 1055 current_space->local_used += nzi; 1056 current_space->local_remaining -= nzi; 1057 current_space_lvl->array += nzi; 1058 current_space_lvl->local_used += nzi; 1059 current_space_lvl->local_remaining -= nzi; 1060 } 1061 1062 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1063 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1064 1065 /* destroy list of free space and other temporary arrays */ 1066 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1067 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1068 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1069 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1070 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1071 1072 #if defined(PETSC_USE_INFO) 1073 { 1074 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1075 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1076 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1077 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1078 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1079 if (diagonal_fill) { 1080 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1081 } 1082 } 1083 #endif 1084 1085 /* put together the new matrix */ 1086 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1087 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1088 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1089 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1090 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1091 b = (Mat_SeqAIJ*)(*fact)->data; 1092 b->freedata = PETSC_TRUE; 1093 b->singlemalloc = PETSC_FALSE; 1094 len = (bi[n] )*sizeof(PetscScalar); 1095 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1096 b->j = bj; 1097 b->i = bi; 1098 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1099 b->diag = bdiag; 1100 b->ilen = 0; 1101 b->imax = 0; 1102 b->row = isrow; 1103 b->col = iscol; 1104 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1105 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1106 b->icol = isicol; 1107 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1108 /* In b structure: Free imax, ilen, old a, old j. 1109 Allocate bdiag, solve_work, new a, new j */ 1110 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1111 b->maxnz = b->nz = bi[n] ; 1112 (*fact)->factor = FACTOR_LU; 1113 (*fact)->info.factor_mallocs = reallocs; 1114 (*fact)->info.fill_ratio_given = f; 1115 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1116 1117 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1118 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1119 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #include "src/mat/impls/sbaij/seq/sbaij.h" 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1126 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1127 { 1128 Mat C = *B; 1129 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1130 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1131 IS ip=b->row; 1132 PetscErrorCode ierr; 1133 PetscInt *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1134 PetscInt *ai=a->i,*aj=a->j; 1135 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1136 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1137 PetscReal zeropivot,rs,shiftnz; 1138 PetscReal shiftpd; 1139 ChShift_Ctx sctx; 1140 PetscInt newshift; 1141 1142 PetscFunctionBegin; 1143 shiftnz = info->shiftnz; 1144 shiftpd = info->shiftpd; 1145 zeropivot = info->zeropivot; 1146 1147 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1148 1149 /* initialization */ 1150 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1151 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1152 jl = il + mbs; 1153 rtmp = (MatScalar*)(jl + mbs); 1154 1155 sctx.shift_amount = 0; 1156 sctx.nshift = 0; 1157 do { 1158 sctx.chshift = PETSC_FALSE; 1159 for (i=0; i<mbs; i++) { 1160 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1161 } 1162 1163 for (k = 0; k<mbs; k++){ 1164 bval = ba + bi[k]; 1165 /* initialize k-th row by the perm[k]-th row of A */ 1166 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1167 for (j = jmin; j < jmax; j++){ 1168 col = rip[aj[j]]; 1169 if (col >= k){ /* only take upper triangular entry */ 1170 rtmp[col] = aa[j]; 1171 *bval++ = 0.0; /* for in-place factorization */ 1172 } 1173 } 1174 /* shift the diagonal of the matrix */ 1175 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1176 1177 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1178 dk = rtmp[k]; 1179 i = jl[k]; /* first row to be added to k_th row */ 1180 1181 while (i < k){ 1182 nexti = jl[i]; /* next row to be added to k_th row */ 1183 1184 /* compute multiplier, update diag(k) and U(i,k) */ 1185 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1186 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1187 dk += uikdi*ba[ili]; 1188 ba[ili] = uikdi; /* -U(i,k) */ 1189 1190 /* add multiple of row i to k-th row */ 1191 jmin = ili + 1; jmax = bi[i+1]; 1192 if (jmin < jmax){ 1193 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1194 /* update il and jl for row i */ 1195 il[i] = jmin; 1196 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1197 } 1198 i = nexti; 1199 } 1200 1201 /* shift the diagonals when zero pivot is detected */ 1202 /* compute rs=sum of abs(off-diagonal) */ 1203 rs = 0.0; 1204 jmin = bi[k]+1; 1205 nz = bi[k+1] - jmin; 1206 if (nz){ 1207 bcol = bj + jmin; 1208 while (nz--){ 1209 rs += PetscAbsScalar(rtmp[*bcol]); 1210 bcol++; 1211 } 1212 } 1213 1214 sctx.rs = rs; 1215 sctx.pv = dk; 1216 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1217 if (newshift == 1) break; 1218 1219 /* copy data into U(k,:) */ 1220 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1221 jmin = bi[k]+1; jmax = bi[k+1]; 1222 if (jmin < jmax) { 1223 for (j=jmin; j<jmax; j++){ 1224 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1225 } 1226 /* add the k-th row into il and jl */ 1227 il[k] = jmin; 1228 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1229 } 1230 } 1231 } while (sctx.chshift); 1232 ierr = PetscFree(il);CHKERRQ(ierr); 1233 1234 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1235 C->factor = FACTOR_CHOLESKY; 1236 C->assembled = PETSC_TRUE; 1237 C->preallocated = PETSC_TRUE; 1238 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1239 if (sctx.nshift){ 1240 if (shiftnz) { 1241 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1242 } else if (shiftpd) { 1243 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1244 } 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1251 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1252 { 1253 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1254 Mat_SeqSBAIJ *b; 1255 Mat B; 1256 PetscErrorCode ierr; 1257 PetscTruth perm_identity; 1258 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1259 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1260 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1261 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1262 PetscReal fill=info->fill,levels=info->levels; 1263 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1264 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1265 PetscBT lnkbt; 1266 1267 PetscFunctionBegin; 1268 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1269 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1270 1271 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1272 ui[0] = 0; 1273 1274 /* special case that simply copies fill pattern */ 1275 if (!levels && perm_identity) { 1276 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1277 for (i=0; i<am; i++) { 1278 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1279 } 1280 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1281 cols = uj; 1282 for (i=0; i<am; i++) { 1283 aj = a->j + a->diag[i]; 1284 ncols = ui[i+1] - ui[i]; 1285 for (j=0; j<ncols; j++) *cols++ = *aj++; 1286 } 1287 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1288 /* initialization */ 1289 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1290 1291 /* jl: linked list for storing indices of the pivot rows 1292 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1293 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1294 il = jl + am; 1295 uj_ptr = (PetscInt**)(il + am); 1296 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1297 for (i=0; i<am; i++){ 1298 jl[i] = am; il[i] = 0; 1299 } 1300 1301 /* create and initialize a linked list for storing column indices of the active row k */ 1302 nlnk = am + 1; 1303 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1304 1305 /* initial FreeSpace size is fill*(ai[am]+1) */ 1306 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1307 current_space = free_space; 1308 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1309 current_space_lvl = free_space_lvl; 1310 1311 for (k=0; k<am; k++){ /* for each active row k */ 1312 /* initialize lnk by the column indices of row rip[k] of A */ 1313 nzk = 0; 1314 ncols = ai[rip[k]+1] - ai[rip[k]]; 1315 ncols_upper = 0; 1316 for (j=0; j<ncols; j++){ 1317 i = *(aj + ai[rip[k]] + j); 1318 if (rip[i] >= k){ /* only take upper triangular entry */ 1319 ajtmp[ncols_upper] = i; 1320 ncols_upper++; 1321 } 1322 } 1323 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1324 nzk += nlnk; 1325 1326 /* update lnk by computing fill-in for each pivot row to be merged in */ 1327 prow = jl[k]; /* 1st pivot row */ 1328 1329 while (prow < k){ 1330 nextprow = jl[prow]; 1331 1332 /* merge prow into k-th row */ 1333 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1334 jmax = ui[prow+1]; 1335 ncols = jmax-jmin; 1336 i = jmin - ui[prow]; 1337 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1338 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1339 j = *(uj - 1); 1340 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1341 nzk += nlnk; 1342 1343 /* update il and jl for prow */ 1344 if (jmin < jmax){ 1345 il[prow] = jmin; 1346 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1347 } 1348 prow = nextprow; 1349 } 1350 1351 /* if free space is not available, make more free space */ 1352 if (current_space->local_remaining<nzk) { 1353 i = am - k + 1; /* num of unfactored rows */ 1354 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1355 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1356 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1357 reallocs++; 1358 } 1359 1360 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1361 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1362 1363 /* add the k-th row into il and jl */ 1364 if (nzk > 1){ 1365 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1366 jl[k] = jl[i]; jl[i] = k; 1367 il[k] = ui[k] + 1; 1368 } 1369 uj_ptr[k] = current_space->array; 1370 uj_lvl_ptr[k] = current_space_lvl->array; 1371 1372 current_space->array += nzk; 1373 current_space->local_used += nzk; 1374 current_space->local_remaining -= nzk; 1375 1376 current_space_lvl->array += nzk; 1377 current_space_lvl->local_used += nzk; 1378 current_space_lvl->local_remaining -= nzk; 1379 1380 ui[k+1] = ui[k] + nzk; 1381 } 1382 1383 #if defined(PETSC_USE_INFO) 1384 if (ai[am] != 0) { 1385 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1386 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1387 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1388 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1389 } else { 1390 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1391 } 1392 #endif 1393 1394 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1395 ierr = PetscFree(jl);CHKERRQ(ierr); 1396 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1397 1398 /* destroy list of free space and other temporary array(s) */ 1399 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1400 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1401 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1402 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1403 1404 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1405 1406 /* put together the new matrix in MATSEQSBAIJ format */ 1407 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1408 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1409 B = *fact; 1410 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1411 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1412 1413 b = (Mat_SeqSBAIJ*)B->data; 1414 b->singlemalloc = PETSC_FALSE; 1415 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1416 b->j = uj; 1417 b->i = ui; 1418 b->diag = 0; 1419 b->ilen = 0; 1420 b->imax = 0; 1421 b->row = perm; 1422 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1423 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1424 b->icol = perm; 1425 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1426 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1427 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1428 b->maxnz = b->nz = ui[am]; 1429 b->freedata = PETSC_TRUE; 1430 1431 B->factor = FACTOR_CHOLESKY; 1432 B->info.factor_mallocs = reallocs; 1433 B->info.fill_ratio_given = fill; 1434 if (ai[am] != 0) { 1435 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1436 } else { 1437 B->info.fill_ratio_needed = 0.0; 1438 } 1439 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1440 if (perm_identity){ 1441 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1442 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1443 } 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1449 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1450 { 1451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1452 Mat_SeqSBAIJ *b; 1453 Mat B; 1454 PetscErrorCode ierr; 1455 PetscTruth perm_identity; 1456 PetscReal fill = info->fill; 1457 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1458 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1459 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1460 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1461 PetscBT lnkbt; 1462 IS iperm; 1463 1464 PetscFunctionBegin; 1465 /* check whether perm is the identity mapping */ 1466 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1467 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1468 1469 if (!perm_identity){ 1470 /* check if perm is symmetric! */ 1471 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1472 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1473 for (i=0; i<am; i++) { 1474 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1475 } 1476 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1477 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1478 } 1479 1480 /* initialization */ 1481 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1482 ui[0] = 0; 1483 1484 /* jl: linked list for storing indices of the pivot rows 1485 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1486 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1487 il = jl + am; 1488 cols = il + am; 1489 ui_ptr = (PetscInt**)(cols + am); 1490 for (i=0; i<am; i++){ 1491 jl[i] = am; il[i] = 0; 1492 } 1493 1494 /* create and initialize a linked list for storing column indices of the active row k */ 1495 nlnk = am + 1; 1496 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1497 1498 /* initial FreeSpace size is fill*(ai[am]+1) */ 1499 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1500 current_space = free_space; 1501 1502 for (k=0; k<am; k++){ /* for each active row k */ 1503 /* initialize lnk by the column indices of row rip[k] of A */ 1504 nzk = 0; 1505 ncols = ai[rip[k]+1] - ai[rip[k]]; 1506 ncols_upper = 0; 1507 for (j=0; j<ncols; j++){ 1508 i = rip[*(aj + ai[rip[k]] + j)]; 1509 if (i >= k){ /* only take upper triangular entry */ 1510 cols[ncols_upper] = i; 1511 ncols_upper++; 1512 } 1513 } 1514 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1515 nzk += nlnk; 1516 1517 /* update lnk by computing fill-in for each pivot row to be merged in */ 1518 prow = jl[k]; /* 1st pivot row */ 1519 1520 while (prow < k){ 1521 nextprow = jl[prow]; 1522 /* merge prow into k-th row */ 1523 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1524 jmax = ui[prow+1]; 1525 ncols = jmax-jmin; 1526 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1527 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1528 nzk += nlnk; 1529 1530 /* update il and jl for prow */ 1531 if (jmin < jmax){ 1532 il[prow] = jmin; 1533 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1534 } 1535 prow = nextprow; 1536 } 1537 1538 /* if free space is not available, make more free space */ 1539 if (current_space->local_remaining<nzk) { 1540 i = am - k + 1; /* num of unfactored rows */ 1541 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1542 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1543 reallocs++; 1544 } 1545 1546 /* copy data into free space, then initialize lnk */ 1547 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1548 1549 /* add the k-th row into il and jl */ 1550 if (nzk-1 > 0){ 1551 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1552 jl[k] = jl[i]; jl[i] = k; 1553 il[k] = ui[k] + 1; 1554 } 1555 ui_ptr[k] = current_space->array; 1556 current_space->array += nzk; 1557 current_space->local_used += nzk; 1558 current_space->local_remaining -= nzk; 1559 1560 ui[k+1] = ui[k] + nzk; 1561 } 1562 1563 #if defined(PETSC_USE_INFO) 1564 if (ai[am] != 0) { 1565 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1566 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1567 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1568 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1569 } else { 1570 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1571 } 1572 #endif 1573 1574 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1575 ierr = PetscFree(jl);CHKERRQ(ierr); 1576 1577 /* destroy list of free space and other temporary array(s) */ 1578 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1579 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1580 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1581 1582 /* put together the new matrix in MATSEQSBAIJ format */ 1583 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1584 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1585 B = *fact; 1586 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1587 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1588 1589 b = (Mat_SeqSBAIJ*)B->data; 1590 b->singlemalloc = PETSC_FALSE; 1591 b->freedata = PETSC_TRUE; 1592 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1593 b->j = uj; 1594 b->i = ui; 1595 b->diag = 0; 1596 b->ilen = 0; 1597 b->imax = 0; 1598 b->row = perm; 1599 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1600 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1601 b->icol = perm; 1602 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1603 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1604 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1605 b->maxnz = b->nz = ui[am]; 1606 1607 B->factor = FACTOR_CHOLESKY; 1608 B->info.factor_mallocs = reallocs; 1609 B->info.fill_ratio_given = fill; 1610 if (ai[am] != 0) { 1611 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1612 } else { 1613 B->info.fill_ratio_needed = 0.0; 1614 } 1615 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1616 if (perm_identity){ 1617 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1618 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1619 } 1620 PetscFunctionReturn(0); 1621 } 1622