1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->rmap.n; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 252 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 253 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 254 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,**bi_ptr; 274 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr); 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 310 nzi += nlnk; 311 312 /* add pivot rows into linked list */ 313 row = lnk[n]; 314 while (row < i) { 315 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 316 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 317 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 318 nzi += nlnk; 319 row = lnk[row]; 320 } 321 bi[i+1] = bi[i] + nzi; 322 im[i] = nzi; 323 324 /* mark bdiag */ 325 nzbd = 0; 326 nnz = nzi; 327 k = lnk[n]; 328 while (nnz-- && k < i){ 329 nzbd++; 330 k = lnk[k]; 331 } 332 bdiag[i] = bi[i] + nzbd; 333 334 /* if free space is not available, make more free space */ 335 if (current_space->local_remaining<nzi) { 336 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 337 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 338 reallocs++; 339 } 340 341 /* copy data into free space, then initialize lnk */ 342 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 343 bi_ptr[i] = current_space->array; 344 current_space->array += nzi; 345 current_space->local_used += nzi; 346 current_space->local_remaining -= nzi; 347 } 348 #if defined(PETSC_USE_INFO) 349 if (ai[n] != 0) { 350 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 351 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 352 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 353 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 354 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 355 } else { 356 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 357 } 358 #endif 359 360 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 361 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 362 363 /* destroy list of free space and other temporary array(s) */ 364 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 365 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 366 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 367 ierr = PetscFree(im);CHKERRQ(ierr); 368 369 /* put together the new matrix */ 370 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 371 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 372 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 373 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 374 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 375 b = (Mat_SeqAIJ*)(*B)->data; 376 b->freedata = PETSC_TRUE; 377 b->singlemalloc = PETSC_FALSE; 378 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 379 b->j = bj; 380 b->i = bi; 381 b->diag = bdiag; 382 b->ilen = 0; 383 b->imax = 0; 384 b->row = isrow; 385 b->col = iscol; 386 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 387 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 388 b->icol = isicol; 389 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 390 391 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 392 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 393 b->maxnz = b->nz = bi[n] ; 394 395 (*B)->factor = FACTOR_LU; 396 (*B)->info.factor_mallocs = reallocs; 397 (*B)->info.fill_ratio_given = f; 398 399 if (ai[n] != 0) { 400 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 401 } else { 402 (*B)->info.fill_ratio_needed = 0.0; 403 } 404 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 405 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 406 PetscFunctionReturn(0); 407 } 408 409 /* ----------------------------------------------------------- */ 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 412 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 413 { 414 Mat C=*B; 415 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 416 IS isrow = b->row,isicol = b->icol; 417 PetscErrorCode ierr; 418 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 419 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 420 PetscInt *diag_offset = b->diag,diag,*pj; 421 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 422 PetscScalar d; 423 PetscReal rs; 424 LUShift_Ctx sctx; 425 PetscInt newshift; 426 427 PetscFunctionBegin; 428 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 429 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 430 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 431 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 432 rtmps = rtmp; ics = ic; 433 434 sctx.shift_top = 0; 435 sctx.nshift_max = 0; 436 sctx.shift_lo = 0; 437 sctx.shift_hi = 0; 438 439 if (!a->diag) { 440 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 441 } 442 /* if both shift schemes are chosen by user, only use info->shiftpd */ 443 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 444 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 445 PetscInt *aai = a->i,*ddiag = a->diag; 446 sctx.shift_top = 0; 447 for (i=0; i<n; i++) { 448 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 449 d = (a->a)[ddiag[i]]; 450 rs = -PetscAbsScalar(d) - PetscRealPart(d); 451 v = a->a+aai[i]; 452 nz = aai[i+1] - aai[i]; 453 for (j=0; j<nz; j++) 454 rs += PetscAbsScalar(v[j]); 455 if (rs>sctx.shift_top) sctx.shift_top = rs; 456 } 457 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 458 sctx.shift_top *= 1.1; 459 sctx.nshift_max = 5; 460 sctx.shift_lo = 0.; 461 sctx.shift_hi = 1.; 462 } 463 464 sctx.shift_amount = 0; 465 sctx.nshift = 0; 466 do { 467 sctx.lushift = PETSC_FALSE; 468 for (i=0; i<n; i++){ 469 nz = bi[i+1] - bi[i]; 470 bjtmp = bj + bi[i]; 471 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 472 473 /* load in initial (unfactored row) */ 474 nz = a->i[r[i]+1] - a->i[r[i]]; 475 ajtmp = a->j + a->i[r[i]]; 476 v = a->a + a->i[r[i]]; 477 for (j=0; j<nz; j++) { 478 rtmp[ics[ajtmp[j]]] = v[j]; 479 } 480 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 481 482 row = *bjtmp++; 483 while (row < i) { 484 pc = rtmp + row; 485 if (*pc != 0.0) { 486 pv = b->a + diag_offset[row]; 487 pj = b->j + diag_offset[row] + 1; 488 multiplier = *pc / *pv++; 489 *pc = multiplier; 490 nz = bi[row+1] - diag_offset[row] - 1; 491 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 492 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 493 } 494 row = *bjtmp++; 495 } 496 /* finished row so stick it into b->a */ 497 pv = b->a + bi[i] ; 498 pj = b->j + bi[i] ; 499 nz = bi[i+1] - bi[i]; 500 diag = diag_offset[i] - bi[i]; 501 rs = 0.0; 502 for (j=0; j<nz; j++) { 503 pv[j] = rtmps[pj[j]]; 504 if (j != diag) rs += PetscAbsScalar(pv[j]); 505 } 506 507 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 508 sctx.rs = rs; 509 sctx.pv = pv[diag]; 510 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 511 if (newshift == 1){ 512 break; /* sctx.shift_amount is updated */ 513 } else if (newshift == -1){ 514 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %G tolerance %G * rs %G",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 515 } 516 } 517 518 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 519 /* 520 * if no shift in this attempt & shifting & started shifting & can refine, 521 * then try lower shift 522 */ 523 sctx.shift_hi = info->shift_fraction; 524 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 525 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 526 sctx.lushift = PETSC_TRUE; 527 sctx.nshift++; 528 } 529 } while (sctx.lushift); 530 531 /* invert diagonal entries for simplier triangular solves */ 532 for (i=0; i<n; i++) { 533 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 534 } 535 536 ierr = PetscFree(rtmp);CHKERRQ(ierr); 537 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 538 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 539 C->factor = FACTOR_LU; 540 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 541 C->assembled = PETSC_TRUE; 542 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 543 if (sctx.nshift){ 544 if (info->shiftnz) { 545 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 546 } else if (info->shiftpd) { 547 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 548 } 549 } 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 555 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 556 { 557 PetscFunctionBegin; 558 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 559 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 560 PetscFunctionReturn(0); 561 } 562 563 564 /* ----------------------------------------------------------- */ 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatLUFactor_SeqAIJ" 567 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 568 { 569 PetscErrorCode ierr; 570 Mat C; 571 572 PetscFunctionBegin; 573 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 574 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 575 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 576 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 /* ----------------------------------------------------------- */ 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatSolve_SeqAIJ" 582 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 583 { 584 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 585 IS iscol = a->col,isrow = a->row; 586 PetscErrorCode ierr; 587 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 588 PetscInt nz,*rout,*cout; 589 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 590 591 PetscFunctionBegin; 592 if (!n) PetscFunctionReturn(0); 593 594 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 595 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 596 tmp = a->solve_work; 597 598 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 599 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 600 601 /* forward solve the lower triangular */ 602 tmp[0] = b[*r++]; 603 tmps = tmp; 604 for (i=1; i<n; i++) { 605 v = aa + ai[i] ; 606 vi = aj + ai[i] ; 607 nz = a->diag[i] - ai[i]; 608 sum = b[*r++]; 609 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 610 tmp[i] = sum; 611 } 612 613 /* backward solve the upper triangular */ 614 for (i=n-1; i>=0; i--){ 615 v = aa + a->diag[i] + 1; 616 vi = aj + a->diag[i] + 1; 617 nz = ai[i+1] - a->diag[i] - 1; 618 sum = tmp[i]; 619 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 620 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 621 } 622 623 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 624 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 625 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 626 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 627 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatMatSolve_SeqAIJ" 633 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 634 { 635 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 636 IS iscol = a->col,isrow = a->row; 637 PetscErrorCode ierr; 638 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 639 PetscInt nz,*rout,*cout,neq; 640 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 641 642 PetscFunctionBegin; 643 if (!n) PetscFunctionReturn(0); 644 645 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 646 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 647 648 tmp = a->solve_work; 649 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 650 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 651 652 for (neq=0; neq<n; neq++){ 653 /* forward solve the lower triangular */ 654 tmp[0] = b[r[0]]; 655 tmps = tmp; 656 for (i=1; i<n; i++) { 657 v = aa + ai[i] ; 658 vi = aj + ai[i] ; 659 nz = a->diag[i] - ai[i]; 660 sum = b[r[i]]; 661 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 662 tmp[i] = sum; 663 } 664 /* backward solve the upper triangular */ 665 for (i=n-1; i>=0; i--){ 666 v = aa + a->diag[i] + 1; 667 vi = aj + a->diag[i] + 1; 668 nz = ai[i+1] - a->diag[i] - 1; 669 sum = tmp[i]; 670 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 671 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 672 } 673 674 b += n; 675 x += n; 676 } 677 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 678 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 679 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 680 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 681 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 /* ----------------------------------------------------------- */ 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 688 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 689 { 690 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 691 PetscErrorCode ierr; 692 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 693 PetscScalar *x,*b,*aa = a->a; 694 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 695 PetscInt adiag_i,i,*vi,nz,ai_i; 696 PetscScalar *v,sum; 697 #endif 698 699 PetscFunctionBegin; 700 if (!n) PetscFunctionReturn(0); 701 702 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 704 705 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 706 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 707 #else 708 /* forward solve the lower triangular */ 709 x[0] = b[0]; 710 for (i=1; i<n; i++) { 711 ai_i = ai[i]; 712 v = aa + ai_i; 713 vi = aj + ai_i; 714 nz = adiag[i] - ai_i; 715 sum = b[i]; 716 while (nz--) sum -= *v++ * x[*vi++]; 717 x[i] = sum; 718 } 719 720 /* backward solve the upper triangular */ 721 for (i=n-1; i>=0; i--){ 722 adiag_i = adiag[i]; 723 v = aa + adiag_i + 1; 724 vi = aj + adiag_i + 1; 725 nz = ai[i+1] - adiag_i - 1; 726 sum = x[i]; 727 while (nz--) sum -= *v++ * x[*vi++]; 728 x[i] = sum*aa[adiag_i]; 729 } 730 #endif 731 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 732 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 733 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 734 PetscFunctionReturn(0); 735 } 736 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 739 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 740 { 741 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 742 IS iscol = a->col,isrow = a->row; 743 PetscErrorCode ierr; 744 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 745 PetscInt nz,*rout,*cout; 746 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 747 748 PetscFunctionBegin; 749 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 750 751 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 752 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 753 tmp = a->solve_work; 754 755 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 756 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 757 758 /* forward solve the lower triangular */ 759 tmp[0] = b[*r++]; 760 for (i=1; i<n; i++) { 761 v = aa + ai[i] ; 762 vi = aj + ai[i] ; 763 nz = a->diag[i] - ai[i]; 764 sum = b[*r++]; 765 while (nz--) sum -= *v++ * tmp[*vi++ ]; 766 tmp[i] = sum; 767 } 768 769 /* backward solve the upper triangular */ 770 for (i=n-1; i>=0; i--){ 771 v = aa + a->diag[i] + 1; 772 vi = aj + a->diag[i] + 1; 773 nz = ai[i+1] - a->diag[i] - 1; 774 sum = tmp[i]; 775 while (nz--) sum -= *v++ * tmp[*vi++ ]; 776 tmp[i] = sum*aa[a->diag[i]]; 777 x[*c--] += tmp[i]; 778 } 779 780 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 781 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 782 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 783 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 784 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 785 786 PetscFunctionReturn(0); 787 } 788 /* -------------------------------------------------------------------*/ 789 #undef __FUNCT__ 790 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 791 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 792 { 793 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 794 IS iscol = a->col,isrow = a->row; 795 PetscErrorCode ierr; 796 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 797 PetscInt nz,*rout,*cout,*diag = a->diag; 798 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 799 800 PetscFunctionBegin; 801 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 802 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 803 tmp = a->solve_work; 804 805 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 806 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 807 808 /* copy the b into temp work space according to permutation */ 809 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 810 811 /* forward solve the U^T */ 812 for (i=0; i<n; i++) { 813 v = aa + diag[i] ; 814 vi = aj + diag[i] + 1; 815 nz = ai[i+1] - diag[i] - 1; 816 s1 = tmp[i]; 817 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 818 while (nz--) { 819 tmp[*vi++ ] -= (*v++)*s1; 820 } 821 tmp[i] = s1; 822 } 823 824 /* backward solve the L^T */ 825 for (i=n-1; i>=0; i--){ 826 v = aa + diag[i] - 1 ; 827 vi = aj + diag[i] - 1 ; 828 nz = diag[i] - ai[i]; 829 s1 = tmp[i]; 830 while (nz--) { 831 tmp[*vi-- ] -= (*v--)*s1; 832 } 833 } 834 835 /* copy tmp into x according to permutation */ 836 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 837 838 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 839 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 840 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 841 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 842 843 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 844 PetscFunctionReturn(0); 845 } 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 849 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 850 { 851 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 852 IS iscol = a->col,isrow = a->row; 853 PetscErrorCode ierr; 854 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 855 PetscInt nz,*rout,*cout,*diag = a->diag; 856 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 857 858 PetscFunctionBegin; 859 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 860 861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 862 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 863 tmp = a->solve_work; 864 865 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 866 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 867 868 /* copy the b into temp work space according to permutation */ 869 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 870 871 /* forward solve the U^T */ 872 for (i=0; i<n; i++) { 873 v = aa + diag[i] ; 874 vi = aj + diag[i] + 1; 875 nz = ai[i+1] - diag[i] - 1; 876 tmp[i] *= *v++; 877 while (nz--) { 878 tmp[*vi++ ] -= (*v++)*tmp[i]; 879 } 880 } 881 882 /* backward solve the L^T */ 883 for (i=n-1; i>=0; i--){ 884 v = aa + diag[i] - 1 ; 885 vi = aj + diag[i] - 1 ; 886 nz = diag[i] - ai[i]; 887 while (nz--) { 888 tmp[*vi-- ] -= (*v--)*tmp[i]; 889 } 890 } 891 892 /* copy tmp into x according to permutation */ 893 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 894 895 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 896 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 897 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 898 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 899 900 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 /* ----------------------------------------------------------------*/ 904 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 905 906 #undef __FUNCT__ 907 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 908 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 909 { 910 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 911 IS isicol; 912 PetscErrorCode ierr; 913 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j; 914 PetscInt *bi,*cols,nnz,*cols_lvl; 915 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 916 PetscInt i,levels,diagonal_fill; 917 PetscTruth col_identity,row_identity; 918 PetscReal f; 919 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 920 PetscBT lnkbt; 921 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 922 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 923 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 924 925 PetscFunctionBegin; 926 f = info->fill; 927 levels = (PetscInt)info->levels; 928 diagonal_fill = (PetscInt)info->diagonal_fill; 929 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 930 931 /* special case that simply copies fill pattern */ 932 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 933 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 934 if (!levels && row_identity && col_identity) { 935 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 936 (*fact)->factor = FACTOR_LU; 937 b = (Mat_SeqAIJ*)(*fact)->data; 938 if (!b->diag) { 939 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 940 } 941 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 942 b->row = isrow; 943 b->col = iscol; 944 b->icol = isicol; 945 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 946 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 947 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 948 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 953 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 954 955 /* get new row pointers */ 956 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 957 bi[0] = 0; 958 /* bdiag is location of diagonal in factor */ 959 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 960 bdiag[0] = 0; 961 962 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 963 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 964 965 /* create a linked list for storing column indices of the active row */ 966 nlnk = n + 1; 967 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 968 969 /* initial FreeSpace size is f*(ai[n]+1) */ 970 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 971 current_space = free_space; 972 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 973 current_space_lvl = free_space_lvl; 974 975 for (i=0; i<n; i++) { 976 nzi = 0; 977 /* copy current row into linked list */ 978 nnz = ai[r[i]+1] - ai[r[i]]; 979 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 980 cols = aj + ai[r[i]]; 981 lnk[i] = -1; /* marker to indicate if diagonal exists */ 982 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 983 nzi += nlnk; 984 985 /* make sure diagonal entry is included */ 986 if (diagonal_fill && lnk[i] == -1) { 987 fm = n; 988 while (lnk[fm] < i) fm = lnk[fm]; 989 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 990 lnk[fm] = i; 991 lnk_lvl[i] = 0; 992 nzi++; dcount++; 993 } 994 995 /* add pivot rows into the active row */ 996 nzbd = 0; 997 prow = lnk[n]; 998 while (prow < i) { 999 nnz = bdiag[prow]; 1000 cols = bj_ptr[prow] + nnz + 1; 1001 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1002 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1003 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1004 nzi += nlnk; 1005 prow = lnk[prow]; 1006 nzbd++; 1007 } 1008 bdiag[i] = nzbd; 1009 bi[i+1] = bi[i] + nzi; 1010 1011 /* if free space is not available, make more free space */ 1012 if (current_space->local_remaining<nzi) { 1013 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1014 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1015 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1016 reallocs++; 1017 } 1018 1019 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1020 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1021 bj_ptr[i] = current_space->array; 1022 bjlvl_ptr[i] = current_space_lvl->array; 1023 1024 /* make sure the active row i has diagonal entry */ 1025 if (*(bj_ptr[i]+bdiag[i]) != i) { 1026 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1027 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 1028 } 1029 1030 current_space->array += nzi; 1031 current_space->local_used += nzi; 1032 current_space->local_remaining -= nzi; 1033 current_space_lvl->array += nzi; 1034 current_space_lvl->local_used += nzi; 1035 current_space_lvl->local_remaining -= nzi; 1036 } 1037 1038 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1039 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1040 1041 /* destroy list of free space and other temporary arrays */ 1042 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1043 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1044 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1045 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1046 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1047 1048 #if defined(PETSC_USE_INFO) 1049 { 1050 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1051 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1052 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1053 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1054 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1055 if (diagonal_fill) { 1056 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1057 } 1058 } 1059 #endif 1060 1061 /* put together the new matrix */ 1062 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1063 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1064 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1065 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1066 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1067 b = (Mat_SeqAIJ*)(*fact)->data; 1068 b->freedata = PETSC_TRUE; 1069 b->singlemalloc = PETSC_FALSE; 1070 len = (bi[n] )*sizeof(PetscScalar); 1071 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1072 b->j = bj; 1073 b->i = bi; 1074 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1075 b->diag = bdiag; 1076 b->ilen = 0; 1077 b->imax = 0; 1078 b->row = isrow; 1079 b->col = iscol; 1080 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1081 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1082 b->icol = isicol; 1083 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1084 /* In b structure: Free imax, ilen, old a, old j. 1085 Allocate bdiag, solve_work, new a, new j */ 1086 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1087 b->maxnz = b->nz = bi[n] ; 1088 (*fact)->factor = FACTOR_LU; 1089 (*fact)->info.factor_mallocs = reallocs; 1090 (*fact)->info.fill_ratio_given = f; 1091 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1092 1093 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1094 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1095 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #include "src/mat/impls/sbaij/seq/sbaij.h" 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1102 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1103 { 1104 Mat C = *B; 1105 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1106 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1107 IS ip=b->row; 1108 PetscErrorCode ierr; 1109 PetscInt *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1110 PetscInt *ai=a->i,*aj=a->j; 1111 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1112 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1113 PetscReal zeropivot,rs,shiftnz; 1114 PetscReal shiftpd; 1115 ChShift_Ctx sctx; 1116 PetscInt newshift; 1117 1118 PetscFunctionBegin; 1119 shiftnz = info->shiftnz; 1120 shiftpd = info->shiftpd; 1121 zeropivot = info->zeropivot; 1122 1123 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1124 1125 /* initialization */ 1126 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1127 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1128 jl = il + mbs; 1129 rtmp = (MatScalar*)(jl + mbs); 1130 1131 sctx.shift_amount = 0; 1132 sctx.nshift = 0; 1133 do { 1134 sctx.chshift = PETSC_FALSE; 1135 for (i=0; i<mbs; i++) { 1136 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1137 } 1138 1139 for (k = 0; k<mbs; k++){ 1140 bval = ba + bi[k]; 1141 /* initialize k-th row by the perm[k]-th row of A */ 1142 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1143 for (j = jmin; j < jmax; j++){ 1144 col = rip[aj[j]]; 1145 if (col >= k){ /* only take upper triangular entry */ 1146 rtmp[col] = aa[j]; 1147 *bval++ = 0.0; /* for in-place factorization */ 1148 } 1149 } 1150 /* shift the diagonal of the matrix */ 1151 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1152 1153 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1154 dk = rtmp[k]; 1155 i = jl[k]; /* first row to be added to k_th row */ 1156 1157 while (i < k){ 1158 nexti = jl[i]; /* next row to be added to k_th row */ 1159 1160 /* compute multiplier, update diag(k) and U(i,k) */ 1161 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1162 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1163 dk += uikdi*ba[ili]; 1164 ba[ili] = uikdi; /* -U(i,k) */ 1165 1166 /* add multiple of row i to k-th row */ 1167 jmin = ili + 1; jmax = bi[i+1]; 1168 if (jmin < jmax){ 1169 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1170 /* update il and jl for row i */ 1171 il[i] = jmin; 1172 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1173 } 1174 i = nexti; 1175 } 1176 1177 /* shift the diagonals when zero pivot is detected */ 1178 /* compute rs=sum of abs(off-diagonal) */ 1179 rs = 0.0; 1180 jmin = bi[k]+1; 1181 nz = bi[k+1] - jmin; 1182 if (nz){ 1183 bcol = bj + jmin; 1184 while (nz--){ 1185 rs += PetscAbsScalar(rtmp[*bcol]); 1186 bcol++; 1187 } 1188 } 1189 1190 sctx.rs = rs; 1191 sctx.pv = dk; 1192 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1193 if (newshift == 1){ 1194 break; /* sctx.shift_amount is updated */ 1195 } else if (newshift == -1){ 1196 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %G tolerance %G * rs %G",k,PetscAbsScalar(dk),zeropivot,rs); 1197 } 1198 1199 /* copy data into U(k,:) */ 1200 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1201 jmin = bi[k]+1; jmax = bi[k+1]; 1202 if (jmin < jmax) { 1203 for (j=jmin; j<jmax; j++){ 1204 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1205 } 1206 /* add the k-th row into il and jl */ 1207 il[k] = jmin; 1208 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1209 } 1210 } 1211 } while (sctx.chshift); 1212 ierr = PetscFree(il);CHKERRQ(ierr); 1213 1214 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1215 C->factor = FACTOR_CHOLESKY; 1216 C->assembled = PETSC_TRUE; 1217 C->preallocated = PETSC_TRUE; 1218 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1219 if (sctx.nshift){ 1220 if (shiftnz) { 1221 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1222 } else if (shiftpd) { 1223 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1224 } 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1231 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1232 { 1233 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1234 Mat_SeqSBAIJ *b; 1235 Mat B; 1236 PetscErrorCode ierr; 1237 PetscTruth perm_identity; 1238 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1239 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1240 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1241 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1242 PetscReal fill=info->fill,levels=info->levels; 1243 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1244 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1245 PetscBT lnkbt; 1246 1247 PetscFunctionBegin; 1248 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1249 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1250 1251 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1252 ui[0] = 0; 1253 1254 /* special case that simply copies fill pattern */ 1255 if (!levels && perm_identity) { 1256 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1257 for (i=0; i<am; i++) { 1258 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1259 } 1260 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1261 cols = uj; 1262 for (i=0; i<am; i++) { 1263 aj = a->j + a->diag[i]; 1264 ncols = ui[i+1] - ui[i]; 1265 for (j=0; j<ncols; j++) *cols++ = *aj++; 1266 } 1267 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1268 /* initialization */ 1269 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1270 1271 /* jl: linked list for storing indices of the pivot rows 1272 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1273 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1274 il = jl + am; 1275 uj_ptr = (PetscInt**)(il + am); 1276 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1277 for (i=0; i<am; i++){ 1278 jl[i] = am; il[i] = 0; 1279 } 1280 1281 /* create and initialize a linked list for storing column indices of the active row k */ 1282 nlnk = am + 1; 1283 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1284 1285 /* initial FreeSpace size is fill*(ai[am]+1) */ 1286 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1287 current_space = free_space; 1288 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1289 current_space_lvl = free_space_lvl; 1290 1291 for (k=0; k<am; k++){ /* for each active row k */ 1292 /* initialize lnk by the column indices of row rip[k] of A */ 1293 nzk = 0; 1294 ncols = ai[rip[k]+1] - ai[rip[k]]; 1295 ncols_upper = 0; 1296 for (j=0; j<ncols; j++){ 1297 i = *(aj + ai[rip[k]] + j); 1298 if (rip[i] >= k){ /* only take upper triangular entry */ 1299 ajtmp[ncols_upper] = i; 1300 ncols_upper++; 1301 } 1302 } 1303 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1304 nzk += nlnk; 1305 1306 /* update lnk by computing fill-in for each pivot row to be merged in */ 1307 prow = jl[k]; /* 1st pivot row */ 1308 1309 while (prow < k){ 1310 nextprow = jl[prow]; 1311 1312 /* merge prow into k-th row */ 1313 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1314 jmax = ui[prow+1]; 1315 ncols = jmax-jmin; 1316 i = jmin - ui[prow]; 1317 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1318 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1319 j = *(uj - 1); 1320 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1321 nzk += nlnk; 1322 1323 /* update il and jl for prow */ 1324 if (jmin < jmax){ 1325 il[prow] = jmin; 1326 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1327 } 1328 prow = nextprow; 1329 } 1330 1331 /* if free space is not available, make more free space */ 1332 if (current_space->local_remaining<nzk) { 1333 i = am - k + 1; /* num of unfactored rows */ 1334 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1335 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1336 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1337 reallocs++; 1338 } 1339 1340 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1341 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1342 1343 /* add the k-th row into il and jl */ 1344 if (nzk > 1){ 1345 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1346 jl[k] = jl[i]; jl[i] = k; 1347 il[k] = ui[k] + 1; 1348 } 1349 uj_ptr[k] = current_space->array; 1350 uj_lvl_ptr[k] = current_space_lvl->array; 1351 1352 current_space->array += nzk; 1353 current_space->local_used += nzk; 1354 current_space->local_remaining -= nzk; 1355 1356 current_space_lvl->array += nzk; 1357 current_space_lvl->local_used += nzk; 1358 current_space_lvl->local_remaining -= nzk; 1359 1360 ui[k+1] = ui[k] + nzk; 1361 } 1362 1363 #if defined(PETSC_USE_INFO) 1364 if (ai[am] != 0) { 1365 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1366 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1367 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1368 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1369 } else { 1370 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1371 } 1372 #endif 1373 1374 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1375 ierr = PetscFree(jl);CHKERRQ(ierr); 1376 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1377 1378 /* destroy list of free space and other temporary array(s) */ 1379 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1380 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1381 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1382 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1383 1384 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1385 1386 /* put together the new matrix in MATSEQSBAIJ format */ 1387 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1388 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1389 B = *fact; 1390 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1391 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1392 1393 b = (Mat_SeqSBAIJ*)B->data; 1394 b->singlemalloc = PETSC_FALSE; 1395 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1396 b->j = uj; 1397 b->i = ui; 1398 b->diag = 0; 1399 b->ilen = 0; 1400 b->imax = 0; 1401 b->row = perm; 1402 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1403 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1404 b->icol = perm; 1405 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1406 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1407 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1408 b->maxnz = b->nz = ui[am]; 1409 1410 B->factor = FACTOR_CHOLESKY; 1411 B->info.factor_mallocs = reallocs; 1412 B->info.fill_ratio_given = fill; 1413 if (ai[am] != 0) { 1414 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1415 } else { 1416 B->info.fill_ratio_needed = 0.0; 1417 } 1418 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1419 if (perm_identity){ 1420 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1421 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1428 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1429 { 1430 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1431 Mat_SeqSBAIJ *b; 1432 Mat B; 1433 PetscErrorCode ierr; 1434 PetscTruth perm_identity; 1435 PetscReal fill = info->fill; 1436 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1437 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1438 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1439 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1440 PetscBT lnkbt; 1441 IS iperm; 1442 1443 PetscFunctionBegin; 1444 /* check whether perm is the identity mapping */ 1445 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1446 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1447 1448 if (!perm_identity){ 1449 /* check if perm is symmetric! */ 1450 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1451 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1452 for (i=0; i<am; i++) { 1453 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1454 } 1455 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1456 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1457 } 1458 1459 /* initialization */ 1460 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1461 ui[0] = 0; 1462 1463 /* jl: linked list for storing indices of the pivot rows 1464 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1465 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1466 il = jl + am; 1467 cols = il + am; 1468 ui_ptr = (PetscInt**)(cols + am); 1469 for (i=0; i<am; i++){ 1470 jl[i] = am; il[i] = 0; 1471 } 1472 1473 /* create and initialize a linked list for storing column indices of the active row k */ 1474 nlnk = am + 1; 1475 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1476 1477 /* initial FreeSpace size is fill*(ai[am]+1) */ 1478 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1479 current_space = free_space; 1480 1481 for (k=0; k<am; k++){ /* for each active row k */ 1482 /* initialize lnk by the column indices of row rip[k] of A */ 1483 nzk = 0; 1484 ncols = ai[rip[k]+1] - ai[rip[k]]; 1485 ncols_upper = 0; 1486 for (j=0; j<ncols; j++){ 1487 i = rip[*(aj + ai[rip[k]] + j)]; 1488 if (i >= k){ /* only take upper triangular entry */ 1489 cols[ncols_upper] = i; 1490 ncols_upper++; 1491 } 1492 } 1493 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1494 nzk += nlnk; 1495 1496 /* update lnk by computing fill-in for each pivot row to be merged in */ 1497 prow = jl[k]; /* 1st pivot row */ 1498 1499 while (prow < k){ 1500 nextprow = jl[prow]; 1501 /* merge prow into k-th row */ 1502 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1503 jmax = ui[prow+1]; 1504 ncols = jmax-jmin; 1505 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1506 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1507 nzk += nlnk; 1508 1509 /* update il and jl for prow */ 1510 if (jmin < jmax){ 1511 il[prow] = jmin; 1512 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1513 } 1514 prow = nextprow; 1515 } 1516 1517 /* if free space is not available, make more free space */ 1518 if (current_space->local_remaining<nzk) { 1519 i = am - k + 1; /* num of unfactored rows */ 1520 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1521 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1522 reallocs++; 1523 } 1524 1525 /* copy data into free space, then initialize lnk */ 1526 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1527 1528 /* add the k-th row into il and jl */ 1529 if (nzk-1 > 0){ 1530 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1531 jl[k] = jl[i]; jl[i] = k; 1532 il[k] = ui[k] + 1; 1533 } 1534 ui_ptr[k] = current_space->array; 1535 current_space->array += nzk; 1536 current_space->local_used += nzk; 1537 current_space->local_remaining -= nzk; 1538 1539 ui[k+1] = ui[k] + nzk; 1540 } 1541 1542 #if defined(PETSC_USE_INFO) 1543 if (ai[am] != 0) { 1544 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1545 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1546 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1547 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1548 } else { 1549 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1550 } 1551 #endif 1552 1553 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1554 ierr = PetscFree(jl);CHKERRQ(ierr); 1555 1556 /* destroy list of free space and other temporary array(s) */ 1557 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1558 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1559 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1560 1561 /* put together the new matrix in MATSEQSBAIJ format */ 1562 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1563 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1564 B = *fact; 1565 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1566 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1567 1568 b = (Mat_SeqSBAIJ*)B->data; 1569 b->singlemalloc = PETSC_FALSE; 1570 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1571 b->j = uj; 1572 b->i = ui; 1573 b->diag = 0; 1574 b->ilen = 0; 1575 b->imax = 0; 1576 b->row = perm; 1577 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1578 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1579 b->icol = perm; 1580 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1581 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1582 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1583 b->maxnz = b->nz = ui[am]; 1584 1585 B->factor = FACTOR_CHOLESKY; 1586 B->info.factor_mallocs = reallocs; 1587 B->info.fill_ratio_given = fill; 1588 if (ai[am] != 0) { 1589 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1590 } else { 1591 B->info.fill_ratio_needed = 0.0; 1592 } 1593 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1594 if (perm_identity){ 1595 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1596 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600