1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 #include "petscbt.h" 6 #include "src/mat/utils/freespace.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 22 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 23 24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->m; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 ierr = PetscFree(b->imax);CHKERRQ(ierr); 230 b->sorted = PETSC_FALSE; 231 b->singlemalloc = PETSC_FALSE; 232 /* the next line frees the default space generated by the MatCreate() */ 233 ierr = PetscFree(b->a);CHKERRQ(ierr); 234 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 /* check out for identical nodes. If found, use inode functions */ 251 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 252 253 af = ((double)b->nz)/((double)a->nz) + .001; 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 256 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 257 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 258 259 PetscFunctionReturn(0); 260 #endif 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 265 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 266 { 267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 268 IS isicol; 269 PetscErrorCode ierr; 270 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 271 PetscInt *bi,*bj,*ajtmp; 272 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 273 PetscReal f; 274 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 275 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 276 PetscBT lnkbt; 277 278 PetscFunctionBegin; 279 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 280 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 281 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 282 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 283 284 /* get new row pointers */ 285 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 286 bi[0] = 0; 287 288 /* bdiag is location of diagonal in factor */ 289 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 290 bdiag[0] = 0; 291 292 /* linked list for storing column indices of the active row */ 293 nlnk = n + 1; 294 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 295 296 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 297 im = cols + n; 298 bi_ptr = (PetscInt**)(im + n); 299 300 /* initial FreeSpace size is f*(ai[n]+1) */ 301 f = info->fill; 302 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 303 current_space = free_space; 304 305 for (i=0; i<n; i++) { 306 /* copy previous fill into linked list */ 307 nzi = 0; 308 nnz = ai[r[i]+1] - ai[r[i]]; 309 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 310 ajtmp = aj + ai[r[i]]; 311 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 312 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 313 nzi += nlnk; 314 315 /* add pivot rows into linked list */ 316 row = lnk[n]; 317 while (row < i) { 318 nzbd = bdiag[row] - bi[row] + 1; 319 ajtmp = bi_ptr[row] + nzbd; 320 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 321 im[row] = nzbd; 322 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 323 nzi += nlnk; 324 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 325 326 row = lnk[row]; 327 } 328 329 bi[i+1] = bi[i] + nzi; 330 im[i] = nzi; 331 332 /* mark bdiag */ 333 nzbd = 0; 334 nnz = nzi; 335 k = lnk[n]; 336 while (nnz-- && k < i){ 337 nzbd++; 338 k = lnk[k]; 339 } 340 bdiag[i] = bi[i] + nzbd; 341 342 /* if free space is not available, make more free space */ 343 if (current_space->local_remaining<nzi) { 344 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 345 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 346 reallocs++; 347 } 348 349 /* copy data into free space, then initialize lnk */ 350 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 351 bi_ptr[i] = current_space->array; 352 current_space->array += nzi; 353 current_space->local_used += nzi; 354 current_space->local_remaining -= nzi; 355 } 356 if (ai[n] != 0) { 357 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 358 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 359 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 360 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 361 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 362 } else { 363 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 364 } 365 366 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 367 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 368 369 /* destroy list of free space and other temporary array(s) */ 370 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 371 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 373 ierr = PetscFree(cols);CHKERRQ(ierr); 374 375 /* put together the new matrix */ 376 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 377 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 378 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 379 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 380 b = (Mat_SeqAIJ*)(*B)->data; 381 ierr = PetscFree(b->imax);CHKERRQ(ierr); 382 b->singlemalloc = PETSC_FALSE; 383 /* the next line frees the default space generated by the Create() */ 384 ierr = PetscFree(b->a);CHKERRQ(ierr); 385 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 386 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 387 b->j = bj; 388 b->i = bi; 389 b->diag = bdiag; 390 b->ilen = 0; 391 b->imax = 0; 392 b->row = isrow; 393 b->col = iscol; 394 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 395 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 396 b->icol = isicol; 397 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 398 399 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 400 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 401 b->maxnz = b->nz = bi[n] ; 402 403 (*B)->factor = FACTOR_LU; 404 (*B)->info.factor_mallocs = reallocs; 405 (*B)->info.fill_ratio_given = f; 406 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 407 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 408 409 if (ai[n] != 0) { 410 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 411 } else { 412 (*B)->info.fill_ratio_needed = 0.0; 413 } 414 PetscFunctionReturn(0); 415 } 416 417 /* ----------------------------------------------------------- */ 418 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 422 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 423 { 424 Mat C=*B; 425 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 426 IS isrow = b->row,isicol = b->icol; 427 PetscErrorCode ierr; 428 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 429 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 430 PetscInt *diag_offset = b->diag,diag,*pj; 431 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 432 PetscReal zeropivot,rs,d,shift_nz; 433 PetscReal row_shift,shift_top=0.; 434 PetscTruth shift_pd; 435 LUShift_Ctx sctx; 436 PetscInt newshift; 437 438 PetscFunctionBegin; 439 shift_nz = info->shiftnz; 440 shift_pd = info->shiftpd; 441 zeropivot = info->zeropivot; 442 443 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 444 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 445 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 446 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 447 rtmps = rtmp; ics = ic; 448 449 if (!a->diag) { 450 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 451 } 452 /* if both shift schemes are chosen by user, only use shift_pd */ 453 if (shift_pd && shift_nz) shift_nz = 0.0; 454 if (shift_pd) { /* set shift_top=max{row_shift} */ 455 PetscInt *aai = a->i,*ddiag = a->diag; 456 shift_top = 0; 457 for (i=0; i<n; i++) { 458 d = PetscAbsScalar((a->a)[ddiag[i]]); 459 /* calculate amt of shift needed for this row */ 460 if (d<=0) { 461 row_shift = 0; 462 } else { 463 row_shift = -2*d; 464 } 465 v = a->a+aai[i]; 466 nz = aai[i+1] - aai[i]; 467 for (j=0; j<nz; j++) 468 row_shift += PetscAbsScalar(v[j]); 469 if (row_shift>shift_top) shift_top = row_shift; 470 } 471 } 472 473 sctx.shift_amount = 0; 474 sctx.shift_top = shift_top; 475 sctx.nshift = 0; 476 sctx.nshift_max = 5; 477 sctx.shift_lo = 0.; 478 sctx.shift_hi = 1.; 479 do { 480 sctx.lushift = PETSC_FALSE; 481 for (i=0; i<n; i++){ 482 nz = bi[i+1] - bi[i]; 483 bjtmp = bj + bi[i]; 484 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 485 486 /* load in initial (unfactored row) */ 487 nz = a->i[r[i]+1] - a->i[r[i]]; 488 ajtmp = a->j + a->i[r[i]]; 489 v = a->a + a->i[r[i]]; 490 for (j=0; j<nz; j++) { 491 rtmp[ics[ajtmp[j]]] = v[j]; 492 } 493 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 494 495 row = *bjtmp++; 496 while (row < i) { 497 pc = rtmp + row; 498 if (*pc != 0.0) { 499 pv = b->a + diag_offset[row]; 500 pj = b->j + diag_offset[row] + 1; 501 multiplier = *pc / *pv++; 502 *pc = multiplier; 503 nz = bi[row+1] - diag_offset[row] - 1; 504 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 505 PetscLogFlops(2*nz); 506 } 507 row = *bjtmp++; 508 } 509 /* finished row so stick it into b->a */ 510 pv = b->a + bi[i] ; 511 pj = b->j + bi[i] ; 512 nz = bi[i+1] - bi[i]; 513 diag = diag_offset[i] - bi[i]; 514 rs = 0.0; 515 for (j=0; j<nz; j++) { 516 pv[j] = rtmps[pj[j]]; 517 if (j != diag) rs += PetscAbsScalar(pv[j]); 518 } 519 520 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 521 sctx.rs = rs; 522 sctx.pv = pv[diag]; 523 ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 524 if (newshift == 1){ 525 break; /* sctx.shift_amount is updated */ 526 } else if (newshift == -1){ 527 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 528 } 529 } 530 531 if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 532 /* 533 * if no shift in this attempt & shifting & started shifting & can refine, 534 * then try lower shift 535 */ 536 sctx.shift_hi = info->shift_fraction; 537 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 538 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 539 sctx.lushift = PETSC_TRUE; 540 sctx.nshift++; 541 } 542 } while (sctx.lushift); 543 544 /* invert diagonal entries for simplier triangular solves */ 545 for (i=0; i<n; i++) { 546 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 547 } 548 549 ierr = PetscFree(rtmp);CHKERRQ(ierr); 550 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 551 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 552 C->factor = FACTOR_LU; 553 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 554 C->assembled = PETSC_TRUE; 555 PetscLogFlops(C->n); 556 if (sctx.nshift){ 557 if (shift_nz) { 558 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 559 } else if (shift_pd) { 560 b->lu_shift_fraction = info->shift_fraction; 561 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift); 562 } 563 } 564 PetscFunctionReturn(0); 565 } 566 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 569 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 570 { 571 PetscFunctionBegin; 572 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 573 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 574 PetscFunctionReturn(0); 575 } 576 577 578 /* ----------------------------------------------------------- */ 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatLUFactor_SeqAIJ" 581 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 582 { 583 PetscErrorCode ierr; 584 Mat C; 585 586 PetscFunctionBegin; 587 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 588 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 589 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 590 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 /* ----------------------------------------------------------- */ 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatSolve_SeqAIJ" 596 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 597 { 598 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 599 IS iscol = a->col,isrow = a->row; 600 PetscErrorCode ierr; 601 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 602 PetscInt nz,*rout,*cout; 603 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 604 605 PetscFunctionBegin; 606 if (!n) PetscFunctionReturn(0); 607 608 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 609 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 610 tmp = a->solve_work; 611 612 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 613 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 614 615 /* forward solve the lower triangular */ 616 tmp[0] = b[*r++]; 617 tmps = tmp; 618 for (i=1; i<n; i++) { 619 v = aa + ai[i] ; 620 vi = aj + ai[i] ; 621 nz = a->diag[i] - ai[i]; 622 sum = b[*r++]; 623 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 624 tmp[i] = sum; 625 } 626 627 /* backward solve the upper triangular */ 628 for (i=n-1; i>=0; i--){ 629 v = aa + a->diag[i] + 1; 630 vi = aj + a->diag[i] + 1; 631 nz = ai[i+1] - a->diag[i] - 1; 632 sum = tmp[i]; 633 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 634 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 635 } 636 637 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 638 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 639 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 640 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 641 PetscLogFlops(2*a->nz - A->n); 642 PetscFunctionReturn(0); 643 } 644 645 /* ----------------------------------------------------------- */ 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 648 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 649 { 650 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 651 PetscErrorCode ierr; 652 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 653 PetscScalar *x,*b,*aa = a->a; 654 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 655 PetscInt adiag_i,i,*vi,nz,ai_i; 656 PetscScalar *v,sum; 657 #endif 658 659 PetscFunctionBegin; 660 if (!n) PetscFunctionReturn(0); 661 662 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 663 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 664 665 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 666 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 667 #else 668 /* forward solve the lower triangular */ 669 x[0] = b[0]; 670 for (i=1; i<n; i++) { 671 ai_i = ai[i]; 672 v = aa + ai_i; 673 vi = aj + ai_i; 674 nz = adiag[i] - ai_i; 675 sum = b[i]; 676 while (nz--) sum -= *v++ * x[*vi++]; 677 x[i] = sum; 678 } 679 680 /* backward solve the upper triangular */ 681 for (i=n-1; i>=0; i--){ 682 adiag_i = adiag[i]; 683 v = aa + adiag_i + 1; 684 vi = aj + adiag_i + 1; 685 nz = ai[i+1] - adiag_i - 1; 686 sum = x[i]; 687 while (nz--) sum -= *v++ * x[*vi++]; 688 x[i] = sum*aa[adiag_i]; 689 } 690 #endif 691 PetscLogFlops(2*a->nz - A->n); 692 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 693 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 699 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 700 { 701 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 702 IS iscol = a->col,isrow = a->row; 703 PetscErrorCode ierr; 704 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 705 PetscInt nz,*rout,*cout; 706 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 707 708 PetscFunctionBegin; 709 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 710 711 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 712 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 713 tmp = a->solve_work; 714 715 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 716 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 717 718 /* forward solve the lower triangular */ 719 tmp[0] = b[*r++]; 720 for (i=1; i<n; i++) { 721 v = aa + ai[i] ; 722 vi = aj + ai[i] ; 723 nz = a->diag[i] - ai[i]; 724 sum = b[*r++]; 725 while (nz--) sum -= *v++ * tmp[*vi++ ]; 726 tmp[i] = sum; 727 } 728 729 /* backward solve the upper triangular */ 730 for (i=n-1; i>=0; i--){ 731 v = aa + a->diag[i] + 1; 732 vi = aj + a->diag[i] + 1; 733 nz = ai[i+1] - a->diag[i] - 1; 734 sum = tmp[i]; 735 while (nz--) sum -= *v++ * tmp[*vi++ ]; 736 tmp[i] = sum*aa[a->diag[i]]; 737 x[*c--] += tmp[i]; 738 } 739 740 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 741 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 742 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 743 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 744 PetscLogFlops(2*a->nz); 745 746 PetscFunctionReturn(0); 747 } 748 /* -------------------------------------------------------------------*/ 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 751 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 752 { 753 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 754 IS iscol = a->col,isrow = a->row; 755 PetscErrorCode ierr; 756 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 757 PetscInt nz,*rout,*cout,*diag = a->diag; 758 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 759 760 PetscFunctionBegin; 761 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 762 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 763 tmp = a->solve_work; 764 765 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 766 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 767 768 /* copy the b into temp work space according to permutation */ 769 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 770 771 /* forward solve the U^T */ 772 for (i=0; i<n; i++) { 773 v = aa + diag[i] ; 774 vi = aj + diag[i] + 1; 775 nz = ai[i+1] - diag[i] - 1; 776 s1 = tmp[i]; 777 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 778 while (nz--) { 779 tmp[*vi++ ] -= (*v++)*s1; 780 } 781 tmp[i] = s1; 782 } 783 784 /* backward solve the L^T */ 785 for (i=n-1; i>=0; i--){ 786 v = aa + diag[i] - 1 ; 787 vi = aj + diag[i] - 1 ; 788 nz = diag[i] - ai[i]; 789 s1 = tmp[i]; 790 while (nz--) { 791 tmp[*vi-- ] -= (*v--)*s1; 792 } 793 } 794 795 /* copy tmp into x according to permutation */ 796 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 797 798 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 799 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 801 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 802 803 PetscLogFlops(2*a->nz-A->n); 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 809 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 810 { 811 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 812 IS iscol = a->col,isrow = a->row; 813 PetscErrorCode ierr; 814 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 815 PetscInt nz,*rout,*cout,*diag = a->diag; 816 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 817 818 PetscFunctionBegin; 819 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 820 821 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 822 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 823 tmp = a->solve_work; 824 825 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 826 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 827 828 /* copy the b into temp work space according to permutation */ 829 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 830 831 /* forward solve the U^T */ 832 for (i=0; i<n; i++) { 833 v = aa + diag[i] ; 834 vi = aj + diag[i] + 1; 835 nz = ai[i+1] - diag[i] - 1; 836 tmp[i] *= *v++; 837 while (nz--) { 838 tmp[*vi++ ] -= (*v++)*tmp[i]; 839 } 840 } 841 842 /* backward solve the L^T */ 843 for (i=n-1; i>=0; i--){ 844 v = aa + diag[i] - 1 ; 845 vi = aj + diag[i] - 1 ; 846 nz = diag[i] - ai[i]; 847 while (nz--) { 848 tmp[*vi-- ] -= (*v--)*tmp[i]; 849 } 850 } 851 852 /* copy tmp into x according to permutation */ 853 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 854 855 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 856 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 857 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 858 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 859 860 PetscLogFlops(2*a->nz); 861 PetscFunctionReturn(0); 862 } 863 /* ----------------------------------------------------------------*/ 864 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 868 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 869 { 870 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 871 IS isicol; 872 PetscErrorCode ierr; 873 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 874 PetscInt *bi,*cols,nnz,*cols_lvl; 875 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 876 PetscInt i,levels,diagonal_fill; 877 PetscTruth col_identity,row_identity; 878 PetscReal f; 879 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 880 PetscBT lnkbt; 881 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 882 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 883 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 884 885 PetscFunctionBegin; 886 f = info->fill; 887 levels = (PetscInt)info->levels; 888 diagonal_fill = (PetscInt)info->diagonal_fill; 889 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 890 891 /* special case that simply copies fill pattern */ 892 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 893 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 894 if (!levels && row_identity && col_identity) { 895 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 896 (*fact)->factor = FACTOR_LU; 897 b = (Mat_SeqAIJ*)(*fact)->data; 898 if (!b->diag) { 899 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 900 } 901 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 902 b->row = isrow; 903 b->col = iscol; 904 b->icol = isicol; 905 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 906 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 907 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 908 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 913 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 914 915 /* get new row pointers */ 916 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 917 bi[0] = 0; 918 /* bdiag is location of diagonal in factor */ 919 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 920 bdiag[0] = 0; 921 922 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 923 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 924 925 /* create a linked list for storing column indices of the active row */ 926 nlnk = n + 1; 927 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 928 929 /* initial FreeSpace size is f*(ai[n]+1) */ 930 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 931 current_space = free_space; 932 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 933 current_space_lvl = free_space_lvl; 934 935 for (i=0; i<n; i++) { 936 nzi = 0; 937 /* copy current row into linked list */ 938 nnz = ai[r[i]+1] - ai[r[i]]; 939 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 940 cols = aj + ai[r[i]]; 941 lnk[i] = -1; /* marker to indicate if diagonal exists */ 942 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 943 nzi += nlnk; 944 945 /* make sure diagonal entry is included */ 946 if (diagonal_fill && lnk[i] == -1) { 947 fm = n; 948 while (lnk[fm] < i) fm = lnk[fm]; 949 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 950 lnk[fm] = i; 951 lnk_lvl[i] = 0; 952 nzi++; dcount++; 953 } 954 955 /* add pivot rows into the active row */ 956 nzbd = 0; 957 prow = lnk[n]; 958 while (prow < i) { 959 nnz = bdiag[prow]; 960 cols = bj_ptr[prow] + nnz + 1; 961 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 962 nnz = bi[prow+1] - bi[prow] - nnz - 1; 963 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 964 nzi += nlnk; 965 prow = lnk[prow]; 966 nzbd++; 967 } 968 bdiag[i] = nzbd; 969 bi[i+1] = bi[i] + nzi; 970 971 /* if free space is not available, make more free space */ 972 if (current_space->local_remaining<nzi) { 973 nnz = nzi*(n - i); /* estimated and max additional space needed */ 974 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 975 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 976 reallocs++; 977 } 978 979 /* copy data into free_space and free_space_lvl, then initialize lnk */ 980 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 981 bj_ptr[i] = current_space->array; 982 bjlvl_ptr[i] = current_space_lvl->array; 983 984 /* make sure the active row i has diagonal entry */ 985 if (*(bj_ptr[i]+bdiag[i]) != i) { 986 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 987 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 988 } 989 990 current_space->array += nzi; 991 current_space->local_used += nzi; 992 current_space->local_remaining -= nzi; 993 current_space_lvl->array += nzi; 994 current_space_lvl->local_used += nzi; 995 current_space_lvl->local_remaining -= nzi; 996 } 997 998 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 999 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1000 1001 /* destroy list of free space and other temporary arrays */ 1002 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1003 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1004 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1005 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1006 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1007 1008 { 1009 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1010 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1011 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1012 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1013 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1014 if (diagonal_fill) { 1015 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1016 } 1017 } 1018 1019 /* put together the new matrix */ 1020 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1021 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1022 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1023 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1024 b = (Mat_SeqAIJ*)(*fact)->data; 1025 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1026 b->singlemalloc = PETSC_FALSE; 1027 len = (bi[n] )*sizeof(PetscScalar); 1028 /* the next line frees the default space generated by the Create() */ 1029 ierr = PetscFree(b->a);CHKERRQ(ierr); 1030 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1031 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1032 b->j = bj; 1033 b->i = bi; 1034 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1035 b->diag = bdiag; 1036 b->ilen = 0; 1037 b->imax = 0; 1038 b->row = isrow; 1039 b->col = iscol; 1040 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1041 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1042 b->icol = isicol; 1043 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1044 /* In b structure: Free imax, ilen, old a, old j. 1045 Allocate bdiag, solve_work, new a, new j */ 1046 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1047 b->maxnz = b->nz = bi[n] ; 1048 (*fact)->factor = FACTOR_LU; 1049 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1050 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1051 (*fact)->info.factor_mallocs = reallocs; 1052 (*fact)->info.fill_ratio_given = f; 1053 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #include "src/mat/impls/sbaij/seq/sbaij.h" 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1060 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1061 { 1062 Mat C = *B; 1063 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1064 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1065 IS ip=b->row; 1066 PetscErrorCode ierr; 1067 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1068 PetscInt *ai=a->i,*aj=a->j; 1069 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1070 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1071 PetscReal zeropivot,rs,shiftnz; 1072 PetscTruth shiftpd; 1073 ChShift_Ctx sctx; 1074 PetscInt newshift; 1075 1076 PetscFunctionBegin; 1077 shiftnz = info->shiftnz; 1078 shiftpd = info->shiftpd; 1079 zeropivot = info->zeropivot; 1080 1081 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1082 1083 /* initialization */ 1084 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1085 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1086 jl = il + mbs; 1087 rtmp = (MatScalar*)(jl + mbs); 1088 1089 sctx.shift_amount = 0; 1090 sctx.nshift = 0; 1091 do { 1092 sctx.chshift = PETSC_FALSE; 1093 for (i=0; i<mbs; i++) { 1094 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1095 } 1096 1097 for (k = 0; k<mbs; k++){ 1098 bval = ba + bi[k]; 1099 /* initialize k-th row by the perm[k]-th row of A */ 1100 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1101 for (j = jmin; j < jmax; j++){ 1102 col = rip[aj[j]]; 1103 if (col >= k){ /* only take upper triangular entry */ 1104 rtmp[col] = aa[j]; 1105 *bval++ = 0.0; /* for in-place factorization */ 1106 } 1107 } 1108 /* shift the diagonal of the matrix */ 1109 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1110 1111 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1112 dk = rtmp[k]; 1113 i = jl[k]; /* first row to be added to k_th row */ 1114 1115 while (i < k){ 1116 nexti = jl[i]; /* next row to be added to k_th row */ 1117 1118 /* compute multiplier, update diag(k) and U(i,k) */ 1119 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1120 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1121 dk += uikdi*ba[ili]; 1122 ba[ili] = uikdi; /* -U(i,k) */ 1123 1124 /* add multiple of row i to k-th row */ 1125 jmin = ili + 1; jmax = bi[i+1]; 1126 if (jmin < jmax){ 1127 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1128 /* update il and jl for row i */ 1129 il[i] = jmin; 1130 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1131 } 1132 i = nexti; 1133 } 1134 1135 /* shift the diagonals when zero pivot is detected */ 1136 /* compute rs=sum of abs(off-diagonal) */ 1137 rs = 0.0; 1138 jmin = bi[k]+1; 1139 nz = bi[k+1] - jmin; 1140 if (nz){ 1141 bcol = bj + jmin; 1142 while (nz--){ 1143 rs += PetscAbsScalar(rtmp[*bcol]); 1144 bcol++; 1145 } 1146 } 1147 1148 sctx.rs = rs; 1149 sctx.pv = dk; 1150 ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 1151 if (newshift == 1){ 1152 break; /* sctx.shift_amount is updated */ 1153 } else if (newshift == -1){ 1154 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1155 } 1156 1157 /* copy data into U(k,:) */ 1158 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1159 jmin = bi[k]+1; jmax = bi[k+1]; 1160 if (jmin < jmax) { 1161 for (j=jmin; j<jmax; j++){ 1162 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1163 } 1164 /* add the k-th row into il and jl */ 1165 il[k] = jmin; 1166 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1167 } 1168 } 1169 } while (sctx.chshift); 1170 ierr = PetscFree(il);CHKERRQ(ierr); 1171 1172 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1173 C->factor = FACTOR_CHOLESKY; 1174 C->assembled = PETSC_TRUE; 1175 C->preallocated = PETSC_TRUE; 1176 PetscLogFlops(C->m); 1177 if (sctx.nshift){ 1178 if (shiftnz) { 1179 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1180 } else if (shiftpd) { 1181 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1182 } 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1189 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1190 { 1191 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1192 Mat_SeqSBAIJ *b; 1193 Mat B; 1194 PetscErrorCode ierr; 1195 PetscTruth perm_identity; 1196 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1197 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1198 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1199 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1200 PetscReal fill=info->fill,levels=info->levels; 1201 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1202 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1203 PetscBT lnkbt; 1204 1205 PetscFunctionBegin; 1206 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1207 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1208 1209 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1210 ui[0] = 0; 1211 1212 /* special case that simply copies fill pattern */ 1213 if (!levels && perm_identity) { 1214 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1215 for (i=0; i<am; i++) { 1216 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1217 } 1218 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1219 cols = uj; 1220 for (i=0; i<am; i++) { 1221 aj = a->j + a->diag[i]; 1222 ncols = ui[i+1] - ui[i]; 1223 for (j=0; j<ncols; j++) *cols++ = *aj++; 1224 } 1225 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1226 /* initialization */ 1227 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1228 1229 /* jl: linked list for storing indices of the pivot rows 1230 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1231 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1232 il = jl + am; 1233 uj_ptr = (PetscInt**)(il + am); 1234 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1235 for (i=0; i<am; i++){ 1236 jl[i] = am; il[i] = 0; 1237 } 1238 1239 /* create and initialize a linked list for storing column indices of the active row k */ 1240 nlnk = am + 1; 1241 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1242 1243 /* initial FreeSpace size is fill*(ai[am]+1) */ 1244 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1245 current_space = free_space; 1246 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1247 current_space_lvl = free_space_lvl; 1248 1249 for (k=0; k<am; k++){ /* for each active row k */ 1250 /* initialize lnk by the column indices of row rip[k] of A */ 1251 nzk = 0; 1252 ncols = ai[rip[k]+1] - ai[rip[k]]; 1253 ncols_upper = 0; 1254 for (j=0; j<ncols; j++){ 1255 i = *(aj + ai[rip[k]] + j); 1256 if (rip[i] >= k){ /* only take upper triangular entry */ 1257 ajtmp[ncols_upper] = i; 1258 ncols_upper++; 1259 } 1260 } 1261 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1262 nzk += nlnk; 1263 1264 /* update lnk by computing fill-in for each pivot row to be merged in */ 1265 prow = jl[k]; /* 1st pivot row */ 1266 1267 while (prow < k){ 1268 nextprow = jl[prow]; 1269 1270 /* merge prow into k-th row */ 1271 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1272 jmax = ui[prow+1]; 1273 ncols = jmax-jmin; 1274 i = jmin - ui[prow]; 1275 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1276 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1277 j = *(uj - 1); 1278 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1279 nzk += nlnk; 1280 1281 /* update il and jl for prow */ 1282 if (jmin < jmax){ 1283 il[prow] = jmin; 1284 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1285 } 1286 prow = nextprow; 1287 } 1288 1289 /* if free space is not available, make more free space */ 1290 if (current_space->local_remaining<nzk) { 1291 i = am - k + 1; /* num of unfactored rows */ 1292 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1293 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1294 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1295 reallocs++; 1296 } 1297 1298 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1299 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1300 1301 /* add the k-th row into il and jl */ 1302 if (nzk > 1){ 1303 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1304 jl[k] = jl[i]; jl[i] = k; 1305 il[k] = ui[k] + 1; 1306 } 1307 uj_ptr[k] = current_space->array; 1308 uj_lvl_ptr[k] = current_space_lvl->array; 1309 1310 current_space->array += nzk; 1311 current_space->local_used += nzk; 1312 current_space->local_remaining -= nzk; 1313 1314 current_space_lvl->array += nzk; 1315 current_space_lvl->local_used += nzk; 1316 current_space_lvl->local_remaining -= nzk; 1317 1318 ui[k+1] = ui[k] + nzk; 1319 } 1320 1321 if (ai[am] != 0) { 1322 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1323 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1324 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1325 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1326 } else { 1327 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1328 } 1329 1330 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1331 ierr = PetscFree(jl);CHKERRQ(ierr); 1332 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1333 1334 /* destroy list of free space and other temporary array(s) */ 1335 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1336 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1337 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1338 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1339 1340 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1341 1342 /* put together the new matrix in MATSEQSBAIJ format */ 1343 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1344 B = *fact; 1345 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1346 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1347 1348 b = (Mat_SeqSBAIJ*)B->data; 1349 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1350 b->singlemalloc = PETSC_FALSE; 1351 /* the next line frees the default space generated by the Create() */ 1352 ierr = PetscFree(b->a);CHKERRQ(ierr); 1353 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1354 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1355 b->j = uj; 1356 b->i = ui; 1357 b->diag = 0; 1358 b->ilen = 0; 1359 b->imax = 0; 1360 b->row = perm; 1361 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1362 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1363 b->icol = perm; 1364 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1365 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1366 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1367 b->maxnz = b->nz = ui[am]; 1368 1369 B->factor = FACTOR_CHOLESKY; 1370 B->info.factor_mallocs = reallocs; 1371 B->info.fill_ratio_given = fill; 1372 if (ai[am] != 0) { 1373 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1374 } else { 1375 B->info.fill_ratio_needed = 0.0; 1376 } 1377 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1378 if (perm_identity){ 1379 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1380 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1387 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1388 { 1389 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1390 Mat_SeqSBAIJ *b; 1391 Mat B; 1392 PetscErrorCode ierr; 1393 PetscTruth perm_identity; 1394 PetscReal fill = info->fill; 1395 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1396 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1397 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1398 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1399 PetscBT lnkbt; 1400 IS iperm; 1401 1402 PetscFunctionBegin; 1403 /* check whether perm is the identity mapping */ 1404 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1405 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1406 1407 if (!perm_identity){ 1408 /* check if perm is symmetric! */ 1409 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1410 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1411 for (i=0; i<am; i++) { 1412 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1413 } 1414 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1415 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1416 } 1417 1418 /* initialization */ 1419 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1420 ui[0] = 0; 1421 1422 /* jl: linked list for storing indices of the pivot rows 1423 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1424 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1425 il = jl + am; 1426 cols = il + am; 1427 ui_ptr = (PetscInt**)(cols + am); 1428 for (i=0; i<am; i++){ 1429 jl[i] = am; il[i] = 0; 1430 } 1431 1432 /* create and initialize a linked list for storing column indices of the active row k */ 1433 nlnk = am + 1; 1434 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1435 1436 /* initial FreeSpace size is fill*(ai[am]+1) */ 1437 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1438 current_space = free_space; 1439 1440 for (k=0; k<am; k++){ /* for each active row k */ 1441 /* initialize lnk by the column indices of row rip[k] of A */ 1442 nzk = 0; 1443 ncols = ai[rip[k]+1] - ai[rip[k]]; 1444 ncols_upper = 0; 1445 for (j=0; j<ncols; j++){ 1446 i = rip[*(aj + ai[rip[k]] + j)]; 1447 if (i >= k){ /* only take upper triangular entry */ 1448 cols[ncols_upper] = i; 1449 ncols_upper++; 1450 } 1451 } 1452 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1453 nzk += nlnk; 1454 1455 /* update lnk by computing fill-in for each pivot row to be merged in */ 1456 prow = jl[k]; /* 1st pivot row */ 1457 1458 while (prow < k){ 1459 nextprow = jl[prow]; 1460 /* merge prow into k-th row */ 1461 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1462 jmax = ui[prow+1]; 1463 ncols = jmax-jmin; 1464 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1465 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1466 nzk += nlnk; 1467 1468 /* update il and jl for prow */ 1469 if (jmin < jmax){ 1470 il[prow] = jmin; 1471 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1472 } 1473 prow = nextprow; 1474 } 1475 1476 /* if free space is not available, make more free space */ 1477 if (current_space->local_remaining<nzk) { 1478 i = am - k + 1; /* num of unfactored rows */ 1479 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1480 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1481 reallocs++; 1482 } 1483 1484 /* copy data into free space, then initialize lnk */ 1485 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1486 1487 /* add the k-th row into il and jl */ 1488 if (nzk-1 > 0){ 1489 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1490 jl[k] = jl[i]; jl[i] = k; 1491 il[k] = ui[k] + 1; 1492 } 1493 ui_ptr[k] = current_space->array; 1494 current_space->array += nzk; 1495 current_space->local_used += nzk; 1496 current_space->local_remaining -= nzk; 1497 1498 ui[k+1] = ui[k] + nzk; 1499 } 1500 1501 if (ai[am] != 0) { 1502 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1503 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1504 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1505 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1506 } else { 1507 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1508 } 1509 1510 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1511 ierr = PetscFree(jl);CHKERRQ(ierr); 1512 1513 /* destroy list of free space and other temporary array(s) */ 1514 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1515 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1516 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1517 1518 /* put together the new matrix in MATSEQSBAIJ format */ 1519 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1520 B = *fact; 1521 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1522 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1523 1524 b = (Mat_SeqSBAIJ*)B->data; 1525 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1526 b->singlemalloc = PETSC_FALSE; 1527 /* the next line frees the default space generated by the Create() */ 1528 ierr = PetscFree(b->a);CHKERRQ(ierr); 1529 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1530 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1531 b->j = uj; 1532 b->i = ui; 1533 b->diag = 0; 1534 b->ilen = 0; 1535 b->imax = 0; 1536 b->row = perm; 1537 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1538 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1539 b->icol = perm; 1540 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1541 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1542 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1543 b->maxnz = b->nz = ui[am]; 1544 1545 B->factor = FACTOR_CHOLESKY; 1546 B->info.factor_mallocs = reallocs; 1547 B->info.fill_ratio_given = fill; 1548 if (ai[am] != 0) { 1549 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1550 } else { 1551 B->info.fill_ratio_needed = 0.0; 1552 } 1553 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1554 if (perm_identity){ 1555 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1556 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1557 } 1558 PetscFunctionReturn(0); 1559 } 1560