1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 /* estimate how much additional space we will need */ 339 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 340 /* just double the memory each time */ 341 PetscInt maxadd = jmax; 342 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 343 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 344 jmax += maxadd; 345 346 /* allocate a longer ajnew */ 347 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 348 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscFree(ajnew);CHKERRQ(ierr); 350 ajnew = ajtmp; 351 reallocs++; /* count how many times we realloc */ 352 } 353 ajtmp = ajnew + ainew[i]; 354 fm = fill[n]; 355 nzi = 0; 356 im[i] = nnz; 357 while (nnz--) { 358 if (fm < i) nzi++; 359 *ajtmp++ = fm ; 360 fm = fill[fm]; 361 } 362 idnew[i] = ainew[i] + nzi; 363 } 364 if (ai[n] != 0) { 365 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 366 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 370 } else { 371 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 372 } 373 374 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 375 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 376 377 ierr = PetscFree(fill);CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 381 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 382 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 383 PetscLogObjectParent(*B,isicol); 384 b = (Mat_SeqAIJ*)(*B)->data; 385 ierr = PetscFree(b->imax);CHKERRQ(ierr); 386 b->singlemalloc = PETSC_FALSE; 387 /* the next line frees the default space generated by the Create() */ 388 ierr = PetscFree(b->a);CHKERRQ(ierr); 389 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 390 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 391 b->j = ajnew; 392 b->i = ainew; 393 b->diag = idnew; 394 b->ilen = 0; 395 b->imax = 0; 396 b->row = isrow; 397 b->col = iscol; 398 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 399 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 400 b->icol = isicol; 401 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 402 /* In b structure: Free imax, ilen, old a, old j. 403 Allocate idnew, solve_work, new a, new j */ 404 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 405 b->maxnz = b->nz = ainew[n] ; 406 407 (*B)->factor = FACTOR_LU; 408 (*B)->info.factor_mallocs = reallocs; 409 (*B)->info.fill_ratio_given = f; 410 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 411 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 412 413 if (ai[n] != 0) { 414 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 415 } else { 416 (*B)->info.fill_ratio_needed = 0.0; 417 } 418 PetscFunctionReturn(0); 419 } 420 /* ----------------------------------------------------------- */ 421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 425 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 426 { 427 Mat C=*B; 428 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 429 IS isrow = b->row,isicol = b->icol; 430 PetscErrorCode ierr; 431 PetscInt *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j; 432 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 433 PetscInt *diag_offset = b->diag,diag,*pj,nshift=0; 434 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 435 PetscReal zeropivot,rs,d,shift_nonzero; 436 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0.,shift_hi=1.,shift_top=0.; 437 PetscTruth lushift,shift_pd; 438 439 PetscFunctionBegin; 440 shift_nonzero = info->shiftnz; 441 shift_pd = info->shiftpd; 442 zeropivot = info->zeropivot; 443 shift_fraction = info->shift_fraction; 444 445 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 446 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 447 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 448 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 449 rtmps = rtmp; ics = ic; 450 451 if (!a->diag) { 452 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 453 } 454 /* if both shift schemes are chosen by user, only use shift_pd */ 455 if (shift_pd && shift_nonzero) shift_nonzero = 0.0; 456 if (shift_pd) { /* set shift_top=max{row_shift} */ 457 PetscInt *aai = a->i,*ddiag = a->diag; 458 shift_top = 0; 459 for (i=0; i<n; i++) { 460 d = PetscAbsScalar((a->a)[ddiag[i]]); 461 /* calculate amt of shift needed for this row */ 462 if (d<=0) { 463 row_shift = 0; 464 } else { 465 row_shift = -2*d; 466 } 467 v = a->a+aai[i]; 468 nz = aai[i+1] - aai[i]; 469 for (j=0; j<nz; j++) 470 row_shift += PetscAbsScalar(v[j]); 471 if (row_shift>shift_top) shift_top = row_shift; 472 } 473 } 474 475 /* shift_fraction = b->lu_shift_fraction; */ 476 shift_amount = 0; 477 do { 478 lushift = PETSC_FALSE; 479 for (i=0; i<n; i++){ 480 nz = ai[i+1] - ai[i]; 481 ajtmp = aj + ai[i]; 482 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 483 484 /* load in initial (unfactored row) */ 485 nz = a->i[r[i]+1] - a->i[r[i]]; 486 ajtmpold = a->j + a->i[r[i]]; 487 v = a->a + a->i[r[i]]; 488 for (j=0; j<nz; j++) { 489 rtmp[ics[ajtmpold[j]]] = v[j]; 490 } 491 rtmp[ics[r[i]]] += shift_amount; /* shift the diagonal of the matrix */ 492 493 row = *ajtmp++; 494 while (row < i) { 495 pc = rtmp + row; 496 if (*pc != 0.0) { 497 pv = b->a + diag_offset[row]; 498 pj = b->j + diag_offset[row] + 1; 499 multiplier = *pc / *pv++; 500 *pc = multiplier; 501 nz = ai[row+1] - diag_offset[row] - 1; 502 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 503 PetscLogFlops(2*nz); 504 } 505 row = *ajtmp++; 506 } 507 /* finished row so stick it into b->a */ 508 pv = b->a + ai[i] ; 509 pj = b->j + ai[i] ; 510 nz = ai[i+1] - ai[i]; 511 diag = diag_offset[i] - ai[i]; 512 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 513 rs = 0.0; 514 for (j=0; j<nz; j++) { 515 pv[j] = rtmps[pj[j]]; 516 if (j != diag) rs += PetscAbsScalar(pv[j]); 517 } 518 /* shift the diagonals when zero pivot is detected */ 519 #define MAX_NSHIFT 5 520 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs && shift_nonzero){ 521 /* force |diag(*B)| > zeropivot*rs */ 522 if (!nshift){ 523 shift_amount = shift_nonzero; 524 } else { 525 shift_amount *= 2.0; 526 } 527 lushift = PETSC_TRUE; 528 nshift++; 529 break; 530 } else if (PetscRealPart(pv[diag]) <= zeropivot*rs && shift_pd){ 531 /* force *B to be diagonally dominant */ 532 if (nshift>MAX_NSHIFT) { 533 SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner"); 534 } else if (nshift==MAX_NSHIFT) { 535 shift_fraction = shift_hi; 536 lushift = PETSC_FALSE; 537 } else { 538 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 539 lushift = PETSC_TRUE; 540 } 541 shift_amount = shift_fraction * shift_top; 542 nshift++; 543 break; 544 } else if (PetscAbsScalar(pv[diag]) <= zeropivot*rs){ 545 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 546 } 547 } 548 if (shift_pd && !lushift && shift_fraction>0 && nshift<MAX_NSHIFT) { 549 /* 550 * if no shift in this attempt & shifting & started shifting & can refine, 551 * then try lower shift 552 */ 553 shift_hi = shift_fraction; 554 shift_fraction = (shift_hi+shift_lo)/2.; 555 shift_amount = shift_fraction * shift_top; 556 lushift = PETSC_TRUE; 557 nshift++; 558 } 559 } while (lushift); 560 561 /* invert diagonal entries for simplier triangular solves */ 562 for (i=0; i<n; i++) { 563 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 564 } 565 566 ierr = PetscFree(rtmp);CHKERRQ(ierr); 567 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 568 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 569 C->factor = FACTOR_LU; 570 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 571 C->assembled = PETSC_TRUE; 572 PetscLogFlops(C->n); 573 if (nshift){ 574 if (shift_nonzero) { 575 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount); 576 } else if (shift_pd) { 577 b->lu_shift_fraction = shift_fraction; 578 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift); 579 } 580 } 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 586 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 587 { 588 PetscFunctionBegin; 589 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 590 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 591 PetscFunctionReturn(0); 592 } 593 594 595 /* ----------------------------------------------------------- */ 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatLUFactor_SeqAIJ" 598 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 599 { 600 PetscErrorCode ierr; 601 Mat C; 602 603 PetscFunctionBegin; 604 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 605 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 606 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 607 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 608 PetscFunctionReturn(0); 609 } 610 /* ----------------------------------------------------------- */ 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatSolve_SeqAIJ" 613 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 614 { 615 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 616 IS iscol = a->col,isrow = a->row; 617 PetscErrorCode ierr; 618 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 619 PetscInt nz,*rout,*cout; 620 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 621 622 PetscFunctionBegin; 623 if (!n) PetscFunctionReturn(0); 624 625 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 626 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 627 tmp = a->solve_work; 628 629 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 630 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 631 632 /* forward solve the lower triangular */ 633 tmp[0] = b[*r++]; 634 tmps = tmp; 635 for (i=1; i<n; i++) { 636 v = aa + ai[i] ; 637 vi = aj + ai[i] ; 638 nz = a->diag[i] - ai[i]; 639 sum = b[*r++]; 640 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 641 tmp[i] = sum; 642 } 643 644 /* backward solve the upper triangular */ 645 for (i=n-1; i>=0; i--){ 646 v = aa + a->diag[i] + 1; 647 vi = aj + a->diag[i] + 1; 648 nz = ai[i+1] - a->diag[i] - 1; 649 sum = tmp[i]; 650 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 651 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 652 } 653 654 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 655 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 656 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 657 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 658 PetscLogFlops(2*a->nz - A->n); 659 PetscFunctionReturn(0); 660 } 661 662 /* ----------------------------------------------------------- */ 663 #undef __FUNCT__ 664 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 665 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 666 { 667 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 668 PetscErrorCode ierr; 669 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 670 PetscScalar *x,*b,*aa = a->a; 671 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 672 PetscInt adiag_i,i,*vi,nz,ai_i; 673 PetscScalar *v,sum; 674 #endif 675 676 PetscFunctionBegin; 677 if (!n) PetscFunctionReturn(0); 678 679 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 680 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 681 682 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 683 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 684 #else 685 /* forward solve the lower triangular */ 686 x[0] = b[0]; 687 for (i=1; i<n; i++) { 688 ai_i = ai[i]; 689 v = aa + ai_i; 690 vi = aj + ai_i; 691 nz = adiag[i] - ai_i; 692 sum = b[i]; 693 while (nz--) sum -= *v++ * x[*vi++]; 694 x[i] = sum; 695 } 696 697 /* backward solve the upper triangular */ 698 for (i=n-1; i>=0; i--){ 699 adiag_i = adiag[i]; 700 v = aa + adiag_i + 1; 701 vi = aj + adiag_i + 1; 702 nz = ai[i+1] - adiag_i - 1; 703 sum = x[i]; 704 while (nz--) sum -= *v++ * x[*vi++]; 705 x[i] = sum*aa[adiag_i]; 706 } 707 #endif 708 PetscLogFlops(2*a->nz - A->n); 709 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 710 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 716 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 717 { 718 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 719 IS iscol = a->col,isrow = a->row; 720 PetscErrorCode ierr; 721 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 722 PetscInt nz,*rout,*cout; 723 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 724 725 PetscFunctionBegin; 726 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 727 728 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 729 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 730 tmp = a->solve_work; 731 732 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 733 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 734 735 /* forward solve the lower triangular */ 736 tmp[0] = b[*r++]; 737 for (i=1; i<n; i++) { 738 v = aa + ai[i] ; 739 vi = aj + ai[i] ; 740 nz = a->diag[i] - ai[i]; 741 sum = b[*r++]; 742 while (nz--) sum -= *v++ * tmp[*vi++ ]; 743 tmp[i] = sum; 744 } 745 746 /* backward solve the upper triangular */ 747 for (i=n-1; i>=0; i--){ 748 v = aa + a->diag[i] + 1; 749 vi = aj + a->diag[i] + 1; 750 nz = ai[i+1] - a->diag[i] - 1; 751 sum = tmp[i]; 752 while (nz--) sum -= *v++ * tmp[*vi++ ]; 753 tmp[i] = sum*aa[a->diag[i]]; 754 x[*c--] += tmp[i]; 755 } 756 757 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 758 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 759 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 760 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 761 PetscLogFlops(2*a->nz); 762 763 PetscFunctionReturn(0); 764 } 765 /* -------------------------------------------------------------------*/ 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 768 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 769 { 770 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 771 IS iscol = a->col,isrow = a->row; 772 PetscErrorCode ierr; 773 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 774 PetscInt nz,*rout,*cout,*diag = a->diag; 775 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 776 777 PetscFunctionBegin; 778 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 779 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 780 tmp = a->solve_work; 781 782 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 783 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 784 785 /* copy the b into temp work space according to permutation */ 786 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 787 788 /* forward solve the U^T */ 789 for (i=0; i<n; i++) { 790 v = aa + diag[i] ; 791 vi = aj + diag[i] + 1; 792 nz = ai[i+1] - diag[i] - 1; 793 s1 = tmp[i]; 794 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 795 while (nz--) { 796 tmp[*vi++ ] -= (*v++)*s1; 797 } 798 tmp[i] = s1; 799 } 800 801 /* backward solve the L^T */ 802 for (i=n-1; i>=0; i--){ 803 v = aa + diag[i] - 1 ; 804 vi = aj + diag[i] - 1 ; 805 nz = diag[i] - ai[i]; 806 s1 = tmp[i]; 807 while (nz--) { 808 tmp[*vi-- ] -= (*v--)*s1; 809 } 810 } 811 812 /* copy tmp into x according to permutation */ 813 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 814 815 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 816 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 817 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 818 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 819 820 PetscLogFlops(2*a->nz-A->n); 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 826 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 827 { 828 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 829 IS iscol = a->col,isrow = a->row; 830 PetscErrorCode ierr; 831 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 832 PetscInt nz,*rout,*cout,*diag = a->diag; 833 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 834 835 PetscFunctionBegin; 836 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 837 838 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 839 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 840 tmp = a->solve_work; 841 842 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 843 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 844 845 /* copy the b into temp work space according to permutation */ 846 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 847 848 /* forward solve the U^T */ 849 for (i=0; i<n; i++) { 850 v = aa + diag[i] ; 851 vi = aj + diag[i] + 1; 852 nz = ai[i+1] - diag[i] - 1; 853 tmp[i] *= *v++; 854 while (nz--) { 855 tmp[*vi++ ] -= (*v++)*tmp[i]; 856 } 857 } 858 859 /* backward solve the L^T */ 860 for (i=n-1; i>=0; i--){ 861 v = aa + diag[i] - 1 ; 862 vi = aj + diag[i] - 1 ; 863 nz = diag[i] - ai[i]; 864 while (nz--) { 865 tmp[*vi-- ] -= (*v--)*tmp[i]; 866 } 867 } 868 869 /* copy tmp into x according to permutation */ 870 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 871 872 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 873 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 874 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 875 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 876 877 PetscLogFlops(2*a->nz); 878 PetscFunctionReturn(0); 879 } 880 /* ----------------------------------------------------------------*/ 881 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 885 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 886 { 887 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 888 IS isicol; 889 PetscErrorCode ierr; 890 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 891 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 892 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 893 PetscInt incrlev,nnz,i,levels,diagonal_fill; 894 PetscTruth col_identity,row_identity; 895 PetscReal f; 896 897 PetscFunctionBegin; 898 f = info->fill; 899 levels = (PetscInt)info->levels; 900 diagonal_fill = (PetscInt)info->diagonal_fill; 901 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 902 903 /* special case that simply copies fill pattern */ 904 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 905 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 906 if (!levels && row_identity && col_identity) { 907 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 908 (*fact)->factor = FACTOR_LU; 909 b = (Mat_SeqAIJ*)(*fact)->data; 910 if (!b->diag) { 911 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 912 } 913 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 914 b->row = isrow; 915 b->col = iscol; 916 b->icol = isicol; 917 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 918 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 919 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 920 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 924 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 925 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 926 927 /* get new row pointers */ 928 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 929 ainew[0] = 0; 930 /* don't know how many column pointers are needed so estimate */ 931 jmax = (PetscInt)(f*(ai[n]+1)); 932 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 933 /* ajfill is level of fill for each fill entry */ 934 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 935 /* fill is a linked list of nonzeros in active row */ 936 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 937 /* im is level for each filled value */ 938 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 939 /* dloc is location of diagonal in factor */ 940 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 941 dloc[0] = 0; 942 for (prow=0; prow<n; prow++) { 943 944 /* copy current row into linked list */ 945 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 946 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 947 xi = aj + ai[r[prow]]; 948 fill[n] = n; 949 fill[prow] = -1; /* marker to indicate if diagonal exists */ 950 while (nz--) { 951 fm = n; 952 idx = ic[*xi++ ]; 953 do { 954 m = fm; 955 fm = fill[m]; 956 } while (fm < idx); 957 fill[m] = idx; 958 fill[idx] = fm; 959 im[idx] = 0; 960 } 961 962 /* make sure diagonal entry is included */ 963 if (diagonal_fill && fill[prow] == -1) { 964 fm = n; 965 while (fill[fm] < prow) fm = fill[fm]; 966 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 967 fill[fm] = prow; 968 im[prow] = 0; 969 nzf++; 970 dcount++; 971 } 972 973 nzi = 0; 974 row = fill[n]; 975 while (row < prow) { 976 incrlev = im[row] + 1; 977 nz = dloc[row]; 978 xi = ajnew + ainew[row] + nz + 1; 979 flev = ajfill + ainew[row] + nz + 1; 980 nnz = ainew[row+1] - ainew[row] - nz - 1; 981 fm = row; 982 while (nnz-- > 0) { 983 idx = *xi++ ; 984 if (*flev + incrlev > levels) { 985 flev++; 986 continue; 987 } 988 do { 989 m = fm; 990 fm = fill[m]; 991 } while (fm < idx); 992 if (fm != idx) { 993 im[idx] = *flev + incrlev; 994 fill[m] = idx; 995 fill[idx] = fm; 996 fm = idx; 997 nzf++; 998 } else { 999 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1000 } 1001 flev++; 1002 } 1003 row = fill[row]; 1004 nzi++; 1005 } 1006 /* copy new filled row into permanent storage */ 1007 ainew[prow+1] = ainew[prow] + nzf; 1008 if (ainew[prow+1] > jmax) { 1009 1010 /* estimate how much additional space we will need */ 1011 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1012 /* just double the memory each time */ 1013 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1014 PetscInt maxadd = jmax; 1015 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1016 jmax += maxadd; 1017 1018 /* allocate a longer ajnew and ajfill */ 1019 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1020 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1021 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1022 ajnew = xi; 1023 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1024 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1025 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1026 ajfill = xi; 1027 reallocs++; /* count how many times we realloc */ 1028 } 1029 xi = ajnew + ainew[prow] ; 1030 flev = ajfill + ainew[prow] ; 1031 dloc[prow] = nzi; 1032 fm = fill[n]; 1033 while (nzf--) { 1034 *xi++ = fm ; 1035 *flev++ = im[fm]; 1036 fm = fill[fm]; 1037 } 1038 /* make sure row has diagonal entry */ 1039 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1040 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1041 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1042 } 1043 } 1044 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1045 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1046 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1047 ierr = PetscFree(fill);CHKERRQ(ierr); 1048 ierr = PetscFree(im);CHKERRQ(ierr); 1049 1050 { 1051 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1052 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1053 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1054 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1055 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1056 if (diagonal_fill) { 1057 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1058 } 1059 } 1060 1061 /* put together the new matrix */ 1062 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1063 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1064 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1065 PetscLogObjectParent(*fact,isicol); 1066 b = (Mat_SeqAIJ*)(*fact)->data; 1067 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1068 b->singlemalloc = PETSC_FALSE; 1069 len = (ainew[n] )*sizeof(PetscScalar); 1070 /* the next line frees the default space generated by the Create() */ 1071 ierr = PetscFree(b->a);CHKERRQ(ierr); 1072 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1073 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1074 b->j = ajnew; 1075 b->i = ainew; 1076 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1077 b->diag = dloc; 1078 b->ilen = 0; 1079 b->imax = 0; 1080 b->row = isrow; 1081 b->col = iscol; 1082 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1083 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1084 b->icol = isicol; 1085 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1086 /* In b structure: Free imax, ilen, old a, old j. 1087 Allocate dloc, solve_work, new a, new j */ 1088 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1089 b->maxnz = b->nz = ainew[n] ; 1090 (*fact)->factor = FACTOR_LU; 1091 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1092 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1093 (*fact)->info.factor_mallocs = reallocs; 1094 (*fact)->info.fill_ratio_given = f; 1095 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #include "src/mat/impls/sbaij/seq/sbaij.h" 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1102 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1103 { 1104 Mat C = *B; 1105 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1106 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1107 IS ip=b->row; 1108 PetscErrorCode ierr; 1109 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1110 PetscInt *ai=a->i,*aj=a->j; 1111 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1112 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1113 PetscReal zeropivot,shift_amount,rs,shift_nonzero; 1114 PetscTruth chshift,shift_pd; 1115 PetscInt nshift=0; 1116 1117 PetscFunctionBegin; 1118 shift_nonzero = info->shiftnz; 1119 shift_pd = info->shiftpd; 1120 zeropivot = info->zeropivot; 1121 1122 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1123 1124 /* initialization */ 1125 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1126 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1127 jl = il + mbs; 1128 rtmp = (MatScalar*)(jl + mbs); 1129 1130 shift_amount = 0; 1131 do { 1132 chshift = PETSC_FALSE; 1133 for (i=0; i<mbs; i++) { 1134 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1135 } 1136 1137 for (k = 0; k<mbs; k++){ 1138 bval = ba + bi[k]; 1139 /* initialize k-th row by the perm[k]-th row of A */ 1140 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1141 for (j = jmin; j < jmax; j++){ 1142 col = rip[aj[j]]; 1143 if (col >= k){ /* only take upper triangular entry */ 1144 rtmp[col] = aa[j]; 1145 *bval++ = 0.0; /* for in-place factorization */ 1146 } 1147 } 1148 /* shift the diagonal of the matrix */ 1149 if (nshift) rtmp[k] += shift_amount; 1150 1151 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1152 dk = rtmp[k]; 1153 i = jl[k]; /* first row to be added to k_th row */ 1154 1155 while (i < k){ 1156 nexti = jl[i]; /* next row to be added to k_th row */ 1157 1158 /* compute multiplier, update diag(k) and U(i,k) */ 1159 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1160 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1161 dk += uikdi*ba[ili]; 1162 ba[ili] = uikdi; /* -U(i,k) */ 1163 1164 /* add multiple of row i to k-th row */ 1165 jmin = ili + 1; jmax = bi[i+1]; 1166 if (jmin < jmax){ 1167 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1168 /* update il and jl for row i */ 1169 il[i] = jmin; 1170 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1171 } 1172 i = nexti; 1173 } 1174 1175 /* shift the diagonals when zero pivot is detected */ 1176 /* compute rs=sum of abs(off-diagonal) */ 1177 rs = 0.0; 1178 jmin = bi[k]+1; 1179 nz = bi[k+1] - jmin; 1180 if (nz){ 1181 bcol = bj + jmin; 1182 while (nz--){ 1183 rs += PetscAbsScalar(rtmp[*bcol++]); 1184 } 1185 } 1186 if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){ 1187 /* force |diag(*B)| > zeropivot*rs */ 1188 if (!nshift){ 1189 shift_amount = shift_nonzero; 1190 } else { 1191 shift_amount *= 2.0; 1192 } 1193 chshift = PETSC_TRUE; 1194 nshift++; 1195 break; 1196 } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){ 1197 /* calculate a shift that would make this row diagonally dominant */ 1198 shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount); 1199 chshift = PETSC_TRUE; 1200 /* Unlike in the ILU case there is no exit condition on nshift: 1201 we increase the shift until it converges. There is no guarantee that 1202 this algorithm converges faster or slower, or is better or worse 1203 than the ILU algorithm. */ 1204 nshift++; 1205 break; 1206 } else if (PetscAbsScalar(dk) <= zeropivot*rs){ 1207 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1208 } 1209 1210 /* copy data into U(k,:) */ 1211 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1212 jmin = bi[k]+1; jmax = bi[k+1]; 1213 if (jmin < jmax) { 1214 for (j=jmin; j<jmax; j++){ 1215 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1216 } 1217 /* add the k-th row into il and jl */ 1218 il[k] = jmin; 1219 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1220 } 1221 } 1222 } while (chshift); 1223 ierr = PetscFree(il);CHKERRQ(ierr); 1224 1225 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1226 C->factor = FACTOR_CHOLESKY; 1227 C->assembled = PETSC_TRUE; 1228 C->preallocated = PETSC_TRUE; 1229 PetscLogFlops(C->m); 1230 if (nshift){ 1231 if (shift_nonzero) { 1232 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount); 1233 } else if (shift_pd) { 1234 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount); 1235 } 1236 } 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #include "petscbt.h" 1241 #include "src/mat/utils/freespace.h" 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1244 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1245 { 1246 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1247 Mat_SeqSBAIJ *b; 1248 Mat B; 1249 PetscErrorCode ierr; 1250 PetscTruth perm_identity; 1251 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1252 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1253 PetscInt nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1254 PetscReal fill=info->fill,levels=info->levels; 1255 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1256 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1257 PetscBT lnkbt; 1258 1259 PetscFunctionBegin; 1260 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1261 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1262 1263 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1264 ui[0] = 0; 1265 1266 /* special case that simply copies fill pattern */ 1267 if (!levels && perm_identity) { 1268 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1269 for (i=0; i<am; i++) { 1270 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1271 } 1272 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1273 cols = uj; 1274 for (i=0; i<am; i++) { 1275 aj = a->j + a->diag[i]; 1276 ncols = ui[i+1] - ui[i]; 1277 for (j=0; j<ncols; j++) *cols++ = *aj++; 1278 } 1279 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1280 /* initialization */ 1281 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1282 1283 /* jl: linked list for storing indices of the pivot rows 1284 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1285 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1286 il = jl + am; 1287 uj_ptr = (PetscInt**)(il + am); 1288 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1289 for (i=0; i<am; i++){ 1290 jl[i] = am; il[i] = 0; 1291 } 1292 1293 /* create and initialize a linked list for storing column indices of the active row k */ 1294 nlnk = am + 1; 1295 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1296 1297 /* initial FreeSpace size is fill*(ai[am]+1) */ 1298 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1299 current_space = free_space; 1300 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1301 current_space_lvl = free_space_lvl; 1302 1303 for (k=0; k<am; k++){ /* for each active row k */ 1304 /* initialize lnk by the column indices of row rip[k] of A */ 1305 nzk = 0; 1306 ncols = ai[rip[k]+1] - ai[rip[k]]; 1307 ncols_upper = 0; 1308 cols = cols_lvl + am; 1309 for (j=0; j<ncols; j++){ 1310 i = rip[*(aj + ai[rip[k]] + j)]; 1311 if (i >= k){ /* only take upper triangular entry */ 1312 cols[ncols_upper] = i; 1313 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1314 ncols_upper++; 1315 } 1316 } 1317 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1318 nzk += nlnk; 1319 1320 /* update lnk by computing fill-in for each pivot row to be merged in */ 1321 prow = jl[k]; /* 1st pivot row */ 1322 1323 while (prow < k){ 1324 nextprow = jl[prow]; 1325 1326 /* merge prow into k-th row */ 1327 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1328 jmax = ui[prow+1]; 1329 ncols = jmax-jmin; 1330 i = jmin - ui[prow]; 1331 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1332 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1333 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1334 nzk += nlnk; 1335 1336 /* update il and jl for prow */ 1337 if (jmin < jmax){ 1338 il[prow] = jmin; 1339 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1340 } 1341 prow = nextprow; 1342 } 1343 1344 /* if free space is not available, make more free space */ 1345 if (current_space->local_remaining<nzk) { 1346 i = am - k + 1; /* num of unfactored rows */ 1347 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1348 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1349 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1350 reallocs++; 1351 } 1352 1353 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1354 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1355 1356 /* add the k-th row into il and jl */ 1357 if (nzk > 1){ 1358 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1359 jl[k] = jl[i]; jl[i] = k; 1360 il[k] = ui[k] + 1; 1361 } 1362 uj_ptr[k] = current_space->array; 1363 uj_lvl_ptr[k] = current_space_lvl->array; 1364 1365 current_space->array += nzk; 1366 current_space->local_used += nzk; 1367 current_space->local_remaining -= nzk; 1368 1369 current_space_lvl->array += nzk; 1370 current_space_lvl->local_used += nzk; 1371 current_space_lvl->local_remaining -= nzk; 1372 1373 ui[k+1] = ui[k] + nzk; 1374 } 1375 1376 if (ai[am] != 0) { 1377 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1378 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1379 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1380 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1381 } else { 1382 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1383 } 1384 1385 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1386 ierr = PetscFree(jl);CHKERRQ(ierr); 1387 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1388 1389 /* destroy list of free space and other temporary array(s) */ 1390 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1391 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1392 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1393 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1394 1395 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1396 1397 /* put together the new matrix in MATSEQSBAIJ format */ 1398 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1399 B = *fact; 1400 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1401 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1402 1403 b = (Mat_SeqSBAIJ*)B->data; 1404 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1405 b->singlemalloc = PETSC_FALSE; 1406 /* the next line frees the default space generated by the Create() */ 1407 ierr = PetscFree(b->a);CHKERRQ(ierr); 1408 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1409 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1410 b->j = uj; 1411 b->i = ui; 1412 b->diag = 0; 1413 b->ilen = 0; 1414 b->imax = 0; 1415 b->row = perm; 1416 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1417 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1418 b->icol = perm; 1419 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1420 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1421 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1422 b->maxnz = b->nz = ui[am]; 1423 1424 B->factor = FACTOR_CHOLESKY; 1425 B->info.factor_mallocs = reallocs; 1426 B->info.fill_ratio_given = fill; 1427 if (ai[am] != 0) { 1428 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1429 } else { 1430 B->info.fill_ratio_needed = 0.0; 1431 } 1432 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1433 if (perm_identity){ 1434 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1435 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1436 } 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1442 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1443 { 1444 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1445 Mat_SeqSBAIJ *b; 1446 Mat B; 1447 PetscErrorCode ierr; 1448 PetscTruth perm_identity; 1449 PetscReal fill = info->fill; 1450 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1451 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1452 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1453 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1454 PetscBT lnkbt; 1455 IS iperm; 1456 1457 PetscFunctionBegin; 1458 /* check whether perm is the identity mapping */ 1459 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1460 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1461 1462 if (!perm_identity){ 1463 /* check if perm is symmetric! */ 1464 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1465 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1466 for (i=0; i<mbs; i++) { 1467 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1468 } 1469 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1470 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1471 } 1472 1473 /* initialization */ 1474 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1475 ui[0] = 0; 1476 1477 /* jl: linked list for storing indices of the pivot rows 1478 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1479 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1480 il = jl + mbs; 1481 cols = il + mbs; 1482 ui_ptr = (PetscInt**)(cols + mbs); 1483 for (i=0; i<mbs; i++){ 1484 jl[i] = mbs; il[i] = 0; 1485 } 1486 1487 /* create and initialize a linked list for storing column indices of the active row k */ 1488 nlnk = mbs + 1; 1489 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1490 1491 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1492 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1493 current_space = free_space; 1494 1495 for (k=0; k<mbs; k++){ /* for each active row k */ 1496 /* initialize lnk by the column indices of row rip[k] of A */ 1497 nzk = 0; 1498 ncols = ai[rip[k]+1] - ai[rip[k]]; 1499 ncols_upper = 0; 1500 for (j=0; j<ncols; j++){ 1501 i = rip[*(aj + ai[rip[k]] + j)]; 1502 if (i >= k){ /* only take upper triangular entry */ 1503 cols[ncols_upper] = i; 1504 ncols_upper++; 1505 } 1506 } 1507 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1508 nzk += nlnk; 1509 1510 /* update lnk by computing fill-in for each pivot row to be merged in */ 1511 prow = jl[k]; /* 1st pivot row */ 1512 1513 while (prow < k){ 1514 nextprow = jl[prow]; 1515 /* merge prow into k-th row */ 1516 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1517 jmax = ui[prow+1]; 1518 ncols = jmax-jmin; 1519 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1520 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1521 nzk += nlnk; 1522 1523 /* update il and jl for prow */ 1524 if (jmin < jmax){ 1525 il[prow] = jmin; 1526 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1527 } 1528 prow = nextprow; 1529 } 1530 1531 /* if free space is not available, make more free space */ 1532 if (current_space->local_remaining<nzk) { 1533 i = mbs - k + 1; /* num of unfactored rows */ 1534 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1535 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1536 reallocs++; 1537 } 1538 1539 /* copy data into free space, then initialize lnk */ 1540 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1541 1542 /* add the k-th row into il and jl */ 1543 if (nzk-1 > 0){ 1544 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1545 jl[k] = jl[i]; jl[i] = k; 1546 il[k] = ui[k] + 1; 1547 } 1548 ui_ptr[k] = current_space->array; 1549 current_space->array += nzk; 1550 current_space->local_used += nzk; 1551 current_space->local_remaining -= nzk; 1552 1553 ui[k+1] = ui[k] + nzk; 1554 } 1555 1556 if (ai[mbs] != 0) { 1557 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1558 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1559 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1560 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1561 } else { 1562 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1563 } 1564 1565 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1566 ierr = PetscFree(jl);CHKERRQ(ierr); 1567 1568 /* destroy list of free space and other temporary array(s) */ 1569 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1570 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1571 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1572 1573 /* put together the new matrix in MATSEQSBAIJ format */ 1574 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1575 B = *fact; 1576 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1577 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1578 1579 b = (Mat_SeqSBAIJ*)B->data; 1580 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1581 b->singlemalloc = PETSC_FALSE; 1582 /* the next line frees the default space generated by the Create() */ 1583 ierr = PetscFree(b->a);CHKERRQ(ierr); 1584 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1585 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1586 b->j = uj; 1587 b->i = ui; 1588 b->diag = 0; 1589 b->ilen = 0; 1590 b->imax = 0; 1591 b->row = perm; 1592 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1593 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1594 b->icol = perm; 1595 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1596 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1597 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1598 b->maxnz = b->nz = ui[mbs]; 1599 1600 B->factor = FACTOR_CHOLESKY; 1601 B->info.factor_mallocs = reallocs; 1602 B->info.fill_ratio_given = fill; 1603 if (ai[mbs] != 0) { 1604 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1605 } else { 1606 B->info.fill_ratio_needed = 0.0; 1607 } 1608 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1609 if (perm_identity){ 1610 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1611 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1612 } 1613 PetscFunctionReturn(0); 1614 } 1615