1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->m; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscVerboseInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 252 ierr = PetscVerboseInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 253 ierr = PetscVerboseInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 254 ierr = PetscVerboseInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,**bi_ptr; 274 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr); 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 310 nzi += nlnk; 311 312 /* add pivot rows into linked list */ 313 row = lnk[n]; 314 while (row < i) { 315 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 316 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 317 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 318 nzi += nlnk; 319 row = lnk[row]; 320 } 321 bi[i+1] = bi[i] + nzi; 322 im[i] = nzi; 323 324 /* mark bdiag */ 325 nzbd = 0; 326 nnz = nzi; 327 k = lnk[n]; 328 while (nnz-- && k < i){ 329 nzbd++; 330 k = lnk[k]; 331 } 332 bdiag[i] = bi[i] + nzbd; 333 334 /* if free space is not available, make more free space */ 335 if (current_space->local_remaining<nzi) { 336 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 337 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 338 reallocs++; 339 } 340 341 /* copy data into free space, then initialize lnk */ 342 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 343 bi_ptr[i] = current_space->array; 344 current_space->array += nzi; 345 current_space->local_used += nzi; 346 current_space->local_remaining -= nzi; 347 } 348 #if defined(PETSC_USE_VERBOSE) 349 if (ai[n] != 0) { 350 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 351 ierr = PetscVerboseInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 352 ierr = PetscVerboseInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr); 353 ierr = PetscVerboseInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 354 ierr = PetscVerboseInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 355 } else { 356 ierr = PetscVerboseInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 357 } 358 #endif 359 360 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 361 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 362 363 /* destroy list of free space and other temporary array(s) */ 364 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 365 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 366 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 367 ierr = PetscFree(im);CHKERRQ(ierr); 368 369 /* put together the new matrix */ 370 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 371 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 372 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 373 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 374 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 375 b = (Mat_SeqAIJ*)(*B)->data; 376 b->freedata = PETSC_TRUE; 377 b->singlemalloc = PETSC_FALSE; 378 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 379 b->j = bj; 380 b->i = bi; 381 b->diag = bdiag; 382 b->ilen = 0; 383 b->imax = 0; 384 b->row = isrow; 385 b->col = iscol; 386 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 387 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 388 b->icol = isicol; 389 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 390 391 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 392 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 393 b->maxnz = b->nz = bi[n] ; 394 395 (*B)->factor = FACTOR_LU; 396 (*B)->info.factor_mallocs = reallocs; 397 (*B)->info.fill_ratio_given = f; 398 399 if (ai[n] != 0) { 400 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 401 } else { 402 (*B)->info.fill_ratio_needed = 0.0; 403 } 404 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 405 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 406 PetscFunctionReturn(0); 407 } 408 409 /* ----------------------------------------------------------- */ 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 412 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 413 { 414 Mat C=*B; 415 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 416 IS isrow = b->row,isicol = b->icol; 417 PetscErrorCode ierr; 418 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 419 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 420 PetscInt *diag_offset = b->diag,diag,*pj; 421 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 422 PetscScalar d; 423 PetscReal rs; 424 LUShift_Ctx sctx; 425 PetscInt newshift; 426 427 PetscFunctionBegin; 428 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 429 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 430 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 431 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 432 rtmps = rtmp; ics = ic; 433 434 if (!a->diag) { 435 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 436 } 437 /* if both shift schemes are chosen by user, only use info->shiftpd */ 438 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 439 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 440 PetscInt *aai = a->i,*ddiag = a->diag; 441 sctx.shift_top = 0; 442 for (i=0; i<n; i++) { 443 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 444 d = (a->a)[ddiag[i]]; 445 rs = -PetscAbsScalar(d) - PetscRealPart(d); 446 v = a->a+aai[i]; 447 nz = aai[i+1] - aai[i]; 448 for (j=0; j<nz; j++) 449 rs += PetscAbsScalar(v[j]); 450 if (rs>sctx.shift_top) sctx.shift_top = rs; 451 } 452 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 453 sctx.shift_top *= 1.1; 454 sctx.nshift_max = 5; 455 sctx.shift_lo = 0.; 456 sctx.shift_hi = 1.; 457 } 458 459 sctx.shift_amount = 0; 460 sctx.nshift = 0; 461 do { 462 sctx.lushift = PETSC_FALSE; 463 for (i=0; i<n; i++){ 464 nz = bi[i+1] - bi[i]; 465 bjtmp = bj + bi[i]; 466 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 467 468 /* load in initial (unfactored row) */ 469 nz = a->i[r[i]+1] - a->i[r[i]]; 470 ajtmp = a->j + a->i[r[i]]; 471 v = a->a + a->i[r[i]]; 472 for (j=0; j<nz; j++) { 473 rtmp[ics[ajtmp[j]]] = v[j]; 474 } 475 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 476 477 row = *bjtmp++; 478 while (row < i) { 479 pc = rtmp + row; 480 if (*pc != 0.0) { 481 pv = b->a + diag_offset[row]; 482 pj = b->j + diag_offset[row] + 1; 483 multiplier = *pc / *pv++; 484 *pc = multiplier; 485 nz = bi[row+1] - diag_offset[row] - 1; 486 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 487 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 488 } 489 row = *bjtmp++; 490 } 491 /* finished row so stick it into b->a */ 492 pv = b->a + bi[i] ; 493 pj = b->j + bi[i] ; 494 nz = bi[i+1] - bi[i]; 495 diag = diag_offset[i] - bi[i]; 496 rs = 0.0; 497 for (j=0; j<nz; j++) { 498 pv[j] = rtmps[pj[j]]; 499 if (j != diag) rs += PetscAbsScalar(pv[j]); 500 } 501 502 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 503 sctx.rs = rs; 504 sctx.pv = pv[diag]; 505 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 506 if (newshift == 1){ 507 break; /* sctx.shift_amount is updated */ 508 } else if (newshift == -1){ 509 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 510 } 511 } 512 513 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 514 /* 515 * if no shift in this attempt & shifting & started shifting & can refine, 516 * then try lower shift 517 */ 518 sctx.shift_hi = info->shift_fraction; 519 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 520 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 521 sctx.lushift = PETSC_TRUE; 522 sctx.nshift++; 523 } 524 } while (sctx.lushift); 525 526 /* invert diagonal entries for simplier triangular solves */ 527 for (i=0; i<n; i++) { 528 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 529 } 530 531 ierr = PetscFree(rtmp);CHKERRQ(ierr); 532 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 533 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 534 C->factor = FACTOR_LU; 535 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 536 C->assembled = PETSC_TRUE; 537 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 538 if (sctx.nshift){ 539 if (info->shiftnz) { 540 ierr = PetscVerboseInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 541 } else if (info->shiftpd) { 542 ierr = PetscVerboseInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 543 } 544 } 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 550 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 551 { 552 PetscFunctionBegin; 553 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 554 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 555 PetscFunctionReturn(0); 556 } 557 558 559 /* ----------------------------------------------------------- */ 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatLUFactor_SeqAIJ" 562 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 563 { 564 PetscErrorCode ierr; 565 Mat C; 566 567 PetscFunctionBegin; 568 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 569 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 570 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 571 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 /* ----------------------------------------------------------- */ 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatSolve_SeqAIJ" 577 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 578 { 579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 580 IS iscol = a->col,isrow = a->row; 581 PetscErrorCode ierr; 582 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 583 PetscInt nz,*rout,*cout; 584 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 585 586 PetscFunctionBegin; 587 if (!n) PetscFunctionReturn(0); 588 589 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 590 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 591 tmp = a->solve_work; 592 593 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 594 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 595 596 /* forward solve the lower triangular */ 597 tmp[0] = b[*r++]; 598 tmps = tmp; 599 for (i=1; i<n; i++) { 600 v = aa + ai[i] ; 601 vi = aj + ai[i] ; 602 nz = a->diag[i] - ai[i]; 603 sum = b[*r++]; 604 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 605 tmp[i] = sum; 606 } 607 608 /* backward solve the upper triangular */ 609 for (i=n-1; i>=0; i--){ 610 v = aa + a->diag[i] + 1; 611 vi = aj + a->diag[i] + 1; 612 nz = ai[i+1] - a->diag[i] - 1; 613 sum = tmp[i]; 614 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 615 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 616 } 617 618 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 619 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 620 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 621 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 622 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatMatSolve_SeqAIJ" 628 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 629 { 630 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 631 IS iscol = a->col,isrow = a->row; 632 PetscErrorCode ierr; 633 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 634 PetscInt nz,*rout,*cout,neq; 635 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 636 637 PetscFunctionBegin; 638 if (!n) PetscFunctionReturn(0); 639 640 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 641 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 642 643 tmp = a->solve_work; 644 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 645 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 646 647 for (neq=0; neq<n; neq++){ 648 /* forward solve the lower triangular */ 649 tmp[0] = b[r[0]]; 650 tmps = tmp; 651 for (i=1; i<n; i++) { 652 v = aa + ai[i] ; 653 vi = aj + ai[i] ; 654 nz = a->diag[i] - ai[i]; 655 sum = b[r[i]]; 656 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 657 tmp[i] = sum; 658 } 659 /* backward solve the upper triangular */ 660 for (i=n-1; i>=0; i--){ 661 v = aa + a->diag[i] + 1; 662 vi = aj + a->diag[i] + 1; 663 nz = ai[i+1] - a->diag[i] - 1; 664 sum = tmp[i]; 665 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 666 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 667 } 668 669 b += n; 670 x += n; 671 } 672 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 673 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 674 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 675 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 676 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 /* ----------------------------------------------------------- */ 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 683 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 684 { 685 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 686 PetscErrorCode ierr; 687 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 688 PetscScalar *x,*b,*aa = a->a; 689 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 690 PetscInt adiag_i,i,*vi,nz,ai_i; 691 PetscScalar *v,sum; 692 #endif 693 694 PetscFunctionBegin; 695 if (!n) PetscFunctionReturn(0); 696 697 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 698 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 699 700 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 701 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 702 #else 703 /* forward solve the lower triangular */ 704 x[0] = b[0]; 705 for (i=1; i<n; i++) { 706 ai_i = ai[i]; 707 v = aa + ai_i; 708 vi = aj + ai_i; 709 nz = adiag[i] - ai_i; 710 sum = b[i]; 711 while (nz--) sum -= *v++ * x[*vi++]; 712 x[i] = sum; 713 } 714 715 /* backward solve the upper triangular */ 716 for (i=n-1; i>=0; i--){ 717 adiag_i = adiag[i]; 718 v = aa + adiag_i + 1; 719 vi = aj + adiag_i + 1; 720 nz = ai[i+1] - adiag_i - 1; 721 sum = x[i]; 722 while (nz--) sum -= *v++ * x[*vi++]; 723 x[i] = sum*aa[adiag_i]; 724 } 725 #endif 726 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 727 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 728 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 734 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 735 { 736 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 737 IS iscol = a->col,isrow = a->row; 738 PetscErrorCode ierr; 739 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 740 PetscInt nz,*rout,*cout; 741 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 742 743 PetscFunctionBegin; 744 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 745 746 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 747 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 748 tmp = a->solve_work; 749 750 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 751 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 752 753 /* forward solve the lower triangular */ 754 tmp[0] = b[*r++]; 755 for (i=1; i<n; i++) { 756 v = aa + ai[i] ; 757 vi = aj + ai[i] ; 758 nz = a->diag[i] - ai[i]; 759 sum = b[*r++]; 760 while (nz--) sum -= *v++ * tmp[*vi++ ]; 761 tmp[i] = sum; 762 } 763 764 /* backward solve the upper triangular */ 765 for (i=n-1; i>=0; i--){ 766 v = aa + a->diag[i] + 1; 767 vi = aj + a->diag[i] + 1; 768 nz = ai[i+1] - a->diag[i] - 1; 769 sum = tmp[i]; 770 while (nz--) sum -= *v++ * tmp[*vi++ ]; 771 tmp[i] = sum*aa[a->diag[i]]; 772 x[*c--] += tmp[i]; 773 } 774 775 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 776 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 777 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 778 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 779 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 780 781 PetscFunctionReturn(0); 782 } 783 /* -------------------------------------------------------------------*/ 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 786 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 787 { 788 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 789 IS iscol = a->col,isrow = a->row; 790 PetscErrorCode ierr; 791 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 792 PetscInt nz,*rout,*cout,*diag = a->diag; 793 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 794 795 PetscFunctionBegin; 796 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 797 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 798 tmp = a->solve_work; 799 800 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 801 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 802 803 /* copy the b into temp work space according to permutation */ 804 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 805 806 /* forward solve the U^T */ 807 for (i=0; i<n; i++) { 808 v = aa + diag[i] ; 809 vi = aj + diag[i] + 1; 810 nz = ai[i+1] - diag[i] - 1; 811 s1 = tmp[i]; 812 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 813 while (nz--) { 814 tmp[*vi++ ] -= (*v++)*s1; 815 } 816 tmp[i] = s1; 817 } 818 819 /* backward solve the L^T */ 820 for (i=n-1; i>=0; i--){ 821 v = aa + diag[i] - 1 ; 822 vi = aj + diag[i] - 1 ; 823 nz = diag[i] - ai[i]; 824 s1 = tmp[i]; 825 while (nz--) { 826 tmp[*vi-- ] -= (*v--)*s1; 827 } 828 } 829 830 /* copy tmp into x according to permutation */ 831 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 832 833 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 834 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 835 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 836 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 837 838 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 844 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 845 { 846 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 847 IS iscol = a->col,isrow = a->row; 848 PetscErrorCode ierr; 849 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 850 PetscInt nz,*rout,*cout,*diag = a->diag; 851 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 852 853 PetscFunctionBegin; 854 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 855 856 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 857 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 858 tmp = a->solve_work; 859 860 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 861 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 862 863 /* copy the b into temp work space according to permutation */ 864 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 865 866 /* forward solve the U^T */ 867 for (i=0; i<n; i++) { 868 v = aa + diag[i] ; 869 vi = aj + diag[i] + 1; 870 nz = ai[i+1] - diag[i] - 1; 871 tmp[i] *= *v++; 872 while (nz--) { 873 tmp[*vi++ ] -= (*v++)*tmp[i]; 874 } 875 } 876 877 /* backward solve the L^T */ 878 for (i=n-1; i>=0; i--){ 879 v = aa + diag[i] - 1 ; 880 vi = aj + diag[i] - 1 ; 881 nz = diag[i] - ai[i]; 882 while (nz--) { 883 tmp[*vi-- ] -= (*v--)*tmp[i]; 884 } 885 } 886 887 /* copy tmp into x according to permutation */ 888 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 889 890 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 891 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 892 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 893 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 894 895 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 896 PetscFunctionReturn(0); 897 } 898 /* ----------------------------------------------------------------*/ 899 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 903 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 904 { 905 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 906 IS isicol; 907 PetscErrorCode ierr; 908 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 909 PetscInt *bi,*cols,nnz,*cols_lvl; 910 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 911 PetscInt i,levels,diagonal_fill; 912 PetscTruth col_identity,row_identity; 913 PetscReal f; 914 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 915 PetscBT lnkbt; 916 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 917 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 918 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 919 920 PetscFunctionBegin; 921 f = info->fill; 922 levels = (PetscInt)info->levels; 923 diagonal_fill = (PetscInt)info->diagonal_fill; 924 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 925 926 /* special case that simply copies fill pattern */ 927 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 928 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 929 if (!levels && row_identity && col_identity) { 930 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 931 (*fact)->factor = FACTOR_LU; 932 b = (Mat_SeqAIJ*)(*fact)->data; 933 if (!b->diag) { 934 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 935 } 936 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 937 b->row = isrow; 938 b->col = iscol; 939 b->icol = isicol; 940 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 941 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 942 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 943 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 948 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 949 950 /* get new row pointers */ 951 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 952 bi[0] = 0; 953 /* bdiag is location of diagonal in factor */ 954 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 955 bdiag[0] = 0; 956 957 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 958 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 959 960 /* create a linked list for storing column indices of the active row */ 961 nlnk = n + 1; 962 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 963 964 /* initial FreeSpace size is f*(ai[n]+1) */ 965 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 966 current_space = free_space; 967 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 968 current_space_lvl = free_space_lvl; 969 970 for (i=0; i<n; i++) { 971 nzi = 0; 972 /* copy current row into linked list */ 973 nnz = ai[r[i]+1] - ai[r[i]]; 974 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 975 cols = aj + ai[r[i]]; 976 lnk[i] = -1; /* marker to indicate if diagonal exists */ 977 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 978 nzi += nlnk; 979 980 /* make sure diagonal entry is included */ 981 if (diagonal_fill && lnk[i] == -1) { 982 fm = n; 983 while (lnk[fm] < i) fm = lnk[fm]; 984 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 985 lnk[fm] = i; 986 lnk_lvl[i] = 0; 987 nzi++; dcount++; 988 } 989 990 /* add pivot rows into the active row */ 991 nzbd = 0; 992 prow = lnk[n]; 993 while (prow < i) { 994 nnz = bdiag[prow]; 995 cols = bj_ptr[prow] + nnz + 1; 996 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 997 nnz = bi[prow+1] - bi[prow] - nnz - 1; 998 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 999 nzi += nlnk; 1000 prow = lnk[prow]; 1001 nzbd++; 1002 } 1003 bdiag[i] = nzbd; 1004 bi[i+1] = bi[i] + nzi; 1005 1006 /* if free space is not available, make more free space */ 1007 if (current_space->local_remaining<nzi) { 1008 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1009 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1010 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1011 reallocs++; 1012 } 1013 1014 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1015 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1016 bj_ptr[i] = current_space->array; 1017 bjlvl_ptr[i] = current_space_lvl->array; 1018 1019 /* make sure the active row i has diagonal entry */ 1020 if (*(bj_ptr[i]+bdiag[i]) != i) { 1021 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1022 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 1023 } 1024 1025 current_space->array += nzi; 1026 current_space->local_used += nzi; 1027 current_space->local_remaining -= nzi; 1028 current_space_lvl->array += nzi; 1029 current_space_lvl->local_used += nzi; 1030 current_space_lvl->local_remaining -= nzi; 1031 } 1032 1033 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1034 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1035 1036 /* destroy list of free space and other temporary arrays */ 1037 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1038 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1039 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1040 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1041 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1042 1043 #if defined(PETSC_USE_VERBOSE) 1044 { 1045 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1046 ierr = PetscVerboseInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 1047 ierr = PetscVerboseInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 1048 ierr = PetscVerboseInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 1049 ierr = PetscVerboseInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 1050 if (diagonal_fill) { 1051 ierr = PetscVerboseInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1052 } 1053 } 1054 #endif 1055 1056 /* put together the new matrix */ 1057 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1058 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1059 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1060 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1061 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1062 b = (Mat_SeqAIJ*)(*fact)->data; 1063 b->freedata = PETSC_TRUE; 1064 b->singlemalloc = PETSC_FALSE; 1065 len = (bi[n] )*sizeof(PetscScalar); 1066 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1067 b->j = bj; 1068 b->i = bi; 1069 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1070 b->diag = bdiag; 1071 b->ilen = 0; 1072 b->imax = 0; 1073 b->row = isrow; 1074 b->col = iscol; 1075 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1076 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1077 b->icol = isicol; 1078 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1079 /* In b structure: Free imax, ilen, old a, old j. 1080 Allocate bdiag, solve_work, new a, new j */ 1081 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1082 b->maxnz = b->nz = bi[n] ; 1083 (*fact)->factor = FACTOR_LU; 1084 (*fact)->info.factor_mallocs = reallocs; 1085 (*fact)->info.fill_ratio_given = f; 1086 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1087 1088 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1089 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1090 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #include "src/mat/impls/sbaij/seq/sbaij.h" 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1097 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1098 { 1099 Mat C = *B; 1100 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1101 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1102 IS ip=b->row; 1103 PetscErrorCode ierr; 1104 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1105 PetscInt *ai=a->i,*aj=a->j; 1106 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1107 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1108 PetscReal zeropivot,rs,shiftnz; 1109 PetscReal shiftpd; 1110 ChShift_Ctx sctx; 1111 PetscInt newshift; 1112 1113 PetscFunctionBegin; 1114 shiftnz = info->shiftnz; 1115 shiftpd = info->shiftpd; 1116 zeropivot = info->zeropivot; 1117 1118 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1119 1120 /* initialization */ 1121 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1122 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1123 jl = il + mbs; 1124 rtmp = (MatScalar*)(jl + mbs); 1125 1126 sctx.shift_amount = 0; 1127 sctx.nshift = 0; 1128 do { 1129 sctx.chshift = PETSC_FALSE; 1130 for (i=0; i<mbs; i++) { 1131 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1132 } 1133 1134 for (k = 0; k<mbs; k++){ 1135 bval = ba + bi[k]; 1136 /* initialize k-th row by the perm[k]-th row of A */ 1137 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1138 for (j = jmin; j < jmax; j++){ 1139 col = rip[aj[j]]; 1140 if (col >= k){ /* only take upper triangular entry */ 1141 rtmp[col] = aa[j]; 1142 *bval++ = 0.0; /* for in-place factorization */ 1143 } 1144 } 1145 /* shift the diagonal of the matrix */ 1146 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1147 1148 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1149 dk = rtmp[k]; 1150 i = jl[k]; /* first row to be added to k_th row */ 1151 1152 while (i < k){ 1153 nexti = jl[i]; /* next row to be added to k_th row */ 1154 1155 /* compute multiplier, update diag(k) and U(i,k) */ 1156 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1157 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1158 dk += uikdi*ba[ili]; 1159 ba[ili] = uikdi; /* -U(i,k) */ 1160 1161 /* add multiple of row i to k-th row */ 1162 jmin = ili + 1; jmax = bi[i+1]; 1163 if (jmin < jmax){ 1164 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1165 /* update il and jl for row i */ 1166 il[i] = jmin; 1167 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1168 } 1169 i = nexti; 1170 } 1171 1172 /* shift the diagonals when zero pivot is detected */ 1173 /* compute rs=sum of abs(off-diagonal) */ 1174 rs = 0.0; 1175 jmin = bi[k]+1; 1176 nz = bi[k+1] - jmin; 1177 if (nz){ 1178 bcol = bj + jmin; 1179 while (nz--){ 1180 rs += PetscAbsScalar(rtmp[*bcol]); 1181 bcol++; 1182 } 1183 } 1184 1185 sctx.rs = rs; 1186 sctx.pv = dk; 1187 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1188 if (newshift == 1){ 1189 break; /* sctx.shift_amount is updated */ 1190 } else if (newshift == -1){ 1191 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1192 } 1193 1194 /* copy data into U(k,:) */ 1195 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1196 jmin = bi[k]+1; jmax = bi[k+1]; 1197 if (jmin < jmax) { 1198 for (j=jmin; j<jmax; j++){ 1199 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1200 } 1201 /* add the k-th row into il and jl */ 1202 il[k] = jmin; 1203 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1204 } 1205 } 1206 } while (sctx.chshift); 1207 ierr = PetscFree(il);CHKERRQ(ierr); 1208 1209 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1210 C->factor = FACTOR_CHOLESKY; 1211 C->assembled = PETSC_TRUE; 1212 C->preallocated = PETSC_TRUE; 1213 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1214 if (sctx.nshift){ 1215 if (shiftnz) { 1216 ierr = PetscVerboseInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1217 } else if (shiftpd) { 1218 ierr = PetscVerboseInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1219 } 1220 } 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1226 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1227 { 1228 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1229 Mat_SeqSBAIJ *b; 1230 Mat B; 1231 PetscErrorCode ierr; 1232 PetscTruth perm_identity; 1233 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1234 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1235 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1236 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1237 PetscReal fill=info->fill,levels=info->levels; 1238 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1239 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1240 PetscBT lnkbt; 1241 1242 PetscFunctionBegin; 1243 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1244 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1245 1246 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1247 ui[0] = 0; 1248 1249 /* special case that simply copies fill pattern */ 1250 if (!levels && perm_identity) { 1251 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1252 for (i=0; i<am; i++) { 1253 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1254 } 1255 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1256 cols = uj; 1257 for (i=0; i<am; i++) { 1258 aj = a->j + a->diag[i]; 1259 ncols = ui[i+1] - ui[i]; 1260 for (j=0; j<ncols; j++) *cols++ = *aj++; 1261 } 1262 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1263 /* initialization */ 1264 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1265 1266 /* jl: linked list for storing indices of the pivot rows 1267 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1268 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1269 il = jl + am; 1270 uj_ptr = (PetscInt**)(il + am); 1271 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1272 for (i=0; i<am; i++){ 1273 jl[i] = am; il[i] = 0; 1274 } 1275 1276 /* create and initialize a linked list for storing column indices of the active row k */ 1277 nlnk = am + 1; 1278 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1279 1280 /* initial FreeSpace size is fill*(ai[am]+1) */ 1281 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1282 current_space = free_space; 1283 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1284 current_space_lvl = free_space_lvl; 1285 1286 for (k=0; k<am; k++){ /* for each active row k */ 1287 /* initialize lnk by the column indices of row rip[k] of A */ 1288 nzk = 0; 1289 ncols = ai[rip[k]+1] - ai[rip[k]]; 1290 ncols_upper = 0; 1291 for (j=0; j<ncols; j++){ 1292 i = *(aj + ai[rip[k]] + j); 1293 if (rip[i] >= k){ /* only take upper triangular entry */ 1294 ajtmp[ncols_upper] = i; 1295 ncols_upper++; 1296 } 1297 } 1298 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1299 nzk += nlnk; 1300 1301 /* update lnk by computing fill-in for each pivot row to be merged in */ 1302 prow = jl[k]; /* 1st pivot row */ 1303 1304 while (prow < k){ 1305 nextprow = jl[prow]; 1306 1307 /* merge prow into k-th row */ 1308 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1309 jmax = ui[prow+1]; 1310 ncols = jmax-jmin; 1311 i = jmin - ui[prow]; 1312 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1313 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1314 j = *(uj - 1); 1315 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1316 nzk += nlnk; 1317 1318 /* update il and jl for prow */ 1319 if (jmin < jmax){ 1320 il[prow] = jmin; 1321 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1322 } 1323 prow = nextprow; 1324 } 1325 1326 /* if free space is not available, make more free space */ 1327 if (current_space->local_remaining<nzk) { 1328 i = am - k + 1; /* num of unfactored rows */ 1329 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1330 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1331 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1332 reallocs++; 1333 } 1334 1335 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1336 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1337 1338 /* add the k-th row into il and jl */ 1339 if (nzk > 1){ 1340 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1341 jl[k] = jl[i]; jl[i] = k; 1342 il[k] = ui[k] + 1; 1343 } 1344 uj_ptr[k] = current_space->array; 1345 uj_lvl_ptr[k] = current_space_lvl->array; 1346 1347 current_space->array += nzk; 1348 current_space->local_used += nzk; 1349 current_space->local_remaining -= nzk; 1350 1351 current_space_lvl->array += nzk; 1352 current_space_lvl->local_used += nzk; 1353 current_space_lvl->local_remaining -= nzk; 1354 1355 ui[k+1] = ui[k] + nzk; 1356 } 1357 1358 #if defined(PETSC_USE_VERBOSE) 1359 if (ai[am] != 0) { 1360 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1361 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1362 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1363 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1364 } else { 1365 ierr = PetscVerboseInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1366 } 1367 #endif 1368 1369 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1370 ierr = PetscFree(jl);CHKERRQ(ierr); 1371 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1372 1373 /* destroy list of free space and other temporary array(s) */ 1374 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1375 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1376 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1377 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1378 1379 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1380 1381 /* put together the new matrix in MATSEQSBAIJ format */ 1382 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1383 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1384 B = *fact; 1385 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1386 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1387 1388 b = (Mat_SeqSBAIJ*)B->data; 1389 b->singlemalloc = PETSC_FALSE; 1390 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1391 b->j = uj; 1392 b->i = ui; 1393 b->diag = 0; 1394 b->ilen = 0; 1395 b->imax = 0; 1396 b->row = perm; 1397 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1398 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1399 b->icol = perm; 1400 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1401 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1402 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1403 b->maxnz = b->nz = ui[am]; 1404 1405 B->factor = FACTOR_CHOLESKY; 1406 B->info.factor_mallocs = reallocs; 1407 B->info.fill_ratio_given = fill; 1408 if (ai[am] != 0) { 1409 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1410 } else { 1411 B->info.fill_ratio_needed = 0.0; 1412 } 1413 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1414 if (perm_identity){ 1415 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1416 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1423 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1424 { 1425 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1426 Mat_SeqSBAIJ *b; 1427 Mat B; 1428 PetscErrorCode ierr; 1429 PetscTruth perm_identity; 1430 PetscReal fill = info->fill; 1431 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1432 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1433 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1434 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1435 PetscBT lnkbt; 1436 IS iperm; 1437 1438 PetscFunctionBegin; 1439 /* check whether perm is the identity mapping */ 1440 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1441 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1442 1443 if (!perm_identity){ 1444 /* check if perm is symmetric! */ 1445 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1446 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1447 for (i=0; i<am; i++) { 1448 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1449 } 1450 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1451 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1452 } 1453 1454 /* initialization */ 1455 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1456 ui[0] = 0; 1457 1458 /* jl: linked list for storing indices of the pivot rows 1459 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1460 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1461 il = jl + am; 1462 cols = il + am; 1463 ui_ptr = (PetscInt**)(cols + am); 1464 for (i=0; i<am; i++){ 1465 jl[i] = am; il[i] = 0; 1466 } 1467 1468 /* create and initialize a linked list for storing column indices of the active row k */ 1469 nlnk = am + 1; 1470 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1471 1472 /* initial FreeSpace size is fill*(ai[am]+1) */ 1473 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1474 current_space = free_space; 1475 1476 for (k=0; k<am; k++){ /* for each active row k */ 1477 /* initialize lnk by the column indices of row rip[k] of A */ 1478 nzk = 0; 1479 ncols = ai[rip[k]+1] - ai[rip[k]]; 1480 ncols_upper = 0; 1481 for (j=0; j<ncols; j++){ 1482 i = rip[*(aj + ai[rip[k]] + j)]; 1483 if (i >= k){ /* only take upper triangular entry */ 1484 cols[ncols_upper] = i; 1485 ncols_upper++; 1486 } 1487 } 1488 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1489 nzk += nlnk; 1490 1491 /* update lnk by computing fill-in for each pivot row to be merged in */ 1492 prow = jl[k]; /* 1st pivot row */ 1493 1494 while (prow < k){ 1495 nextprow = jl[prow]; 1496 /* merge prow into k-th row */ 1497 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1498 jmax = ui[prow+1]; 1499 ncols = jmax-jmin; 1500 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1501 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1502 nzk += nlnk; 1503 1504 /* update il and jl for prow */ 1505 if (jmin < jmax){ 1506 il[prow] = jmin; 1507 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1508 } 1509 prow = nextprow; 1510 } 1511 1512 /* if free space is not available, make more free space */ 1513 if (current_space->local_remaining<nzk) { 1514 i = am - k + 1; /* num of unfactored rows */ 1515 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1516 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1517 reallocs++; 1518 } 1519 1520 /* copy data into free space, then initialize lnk */ 1521 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1522 1523 /* add the k-th row into il and jl */ 1524 if (nzk-1 > 0){ 1525 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1526 jl[k] = jl[i]; jl[i] = k; 1527 il[k] = ui[k] + 1; 1528 } 1529 ui_ptr[k] = current_space->array; 1530 current_space->array += nzk; 1531 current_space->local_used += nzk; 1532 current_space->local_remaining -= nzk; 1533 1534 ui[k+1] = ui[k] + nzk; 1535 } 1536 1537 #if defined(PETSC_USE_VERBOSE) 1538 if (ai[am] != 0) { 1539 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1540 ierr = PetscVerboseInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1541 ierr = PetscVerboseInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1542 ierr = PetscVerboseInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1543 } else { 1544 ierr = PetscVerboseInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1545 } 1546 #endif 1547 1548 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1549 ierr = PetscFree(jl);CHKERRQ(ierr); 1550 1551 /* destroy list of free space and other temporary array(s) */ 1552 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1553 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1554 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1555 1556 /* put together the new matrix in MATSEQSBAIJ format */ 1557 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1558 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1559 B = *fact; 1560 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1561 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1562 1563 b = (Mat_SeqSBAIJ*)B->data; 1564 b->singlemalloc = PETSC_FALSE; 1565 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1566 b->j = uj; 1567 b->i = ui; 1568 b->diag = 0; 1569 b->ilen = 0; 1570 b->imax = 0; 1571 b->row = perm; 1572 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1573 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1574 b->icol = perm; 1575 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1576 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1577 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1578 b->maxnz = b->nz = ui[am]; 1579 1580 B->factor = FACTOR_CHOLESKY; 1581 B->info.factor_mallocs = reallocs; 1582 B->info.fill_ratio_given = fill; 1583 if (ai[am] != 0) { 1584 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1585 } else { 1586 B->info.fill_ratio_needed = 0.0; 1587 } 1588 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1589 if (perm_identity){ 1590 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1591 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1592 } 1593 PetscFunctionReturn(0); 1594 } 1595