1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->m; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 253 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 254 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 274 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 296 im = cols + n; 297 bi_ptr = (PetscInt**)(im + n); 298 299 /* initial FreeSpace size is f*(ai[n]+1) */ 300 f = info->fill; 301 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 302 current_space = free_space; 303 304 for (i=0; i<n; i++) { 305 /* copy previous fill into linked list */ 306 nzi = 0; 307 nnz = ai[r[i]+1] - ai[r[i]]; 308 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 309 ajtmp = aj + ai[r[i]]; 310 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 311 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 312 nzi += nlnk; 313 314 /* add pivot rows into linked list */ 315 row = lnk[n]; 316 while (row < i) { 317 nzbd = bdiag[row] - bi[row] + 1; 318 ajtmp = bi_ptr[row] + nzbd; 319 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 320 im[row] = nzbd; 321 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 322 nzi += nlnk; 323 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 324 325 row = lnk[row]; 326 } 327 328 bi[i+1] = bi[i] + nzi; 329 im[i] = nzi; 330 331 /* mark bdiag */ 332 nzbd = 0; 333 nnz = nzi; 334 k = lnk[n]; 335 while (nnz-- && k < i){ 336 nzbd++; 337 k = lnk[k]; 338 } 339 bdiag[i] = bi[i] + nzbd; 340 341 /* if free space is not available, make more free space */ 342 if (current_space->local_remaining<nzi) { 343 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 344 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 345 reallocs++; 346 } 347 348 /* copy data into free space, then initialize lnk */ 349 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 350 bi_ptr[i] = current_space->array; 351 current_space->array += nzi; 352 current_space->local_used += nzi; 353 current_space->local_remaining -= nzi; 354 } 355 #if defined(PETSC_USE_DEBUG) 356 if (ai[n] != 0) { 357 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 358 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 359 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr); 360 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 361 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 362 } else { 363 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 364 } 365 #endif 366 367 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 368 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 369 370 /* destroy list of free space and other temporary array(s) */ 371 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 372 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 373 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 374 ierr = PetscFree(cols);CHKERRQ(ierr); 375 376 /* put together the new matrix */ 377 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 378 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 379 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 380 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 381 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 382 b = (Mat_SeqAIJ*)(*B)->data; 383 b->freedata = PETSC_TRUE; 384 b->singlemalloc = PETSC_FALSE; 385 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 386 b->j = bj; 387 b->i = bi; 388 b->diag = bdiag; 389 b->ilen = 0; 390 b->imax = 0; 391 b->row = isrow; 392 b->col = iscol; 393 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 394 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 395 b->icol = isicol; 396 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 397 398 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 399 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 400 b->maxnz = b->nz = bi[n] ; 401 402 (*B)->factor = FACTOR_LU; 403 (*B)->info.factor_mallocs = reallocs; 404 (*B)->info.fill_ratio_given = f; 405 406 if (ai[n] != 0) { 407 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 408 } else { 409 (*B)->info.fill_ratio_needed = 0.0; 410 } 411 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 412 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 413 PetscFunctionReturn(0); 414 } 415 416 /* ----------------------------------------------------------- */ 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 419 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 420 { 421 Mat C=*B; 422 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 423 IS isrow = b->row,isicol = b->icol; 424 PetscErrorCode ierr; 425 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 426 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 427 PetscInt *diag_offset = b->diag,diag,*pj; 428 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 429 PetscScalar d; 430 PetscReal rs; 431 LUShift_Ctx sctx; 432 PetscInt newshift; 433 434 PetscFunctionBegin; 435 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 436 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 437 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 438 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 439 rtmps = rtmp; ics = ic; 440 441 if (!a->diag) { 442 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 443 } 444 /* if both shift schemes are chosen by user, only use info->shiftpd */ 445 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 446 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 447 PetscInt *aai = a->i,*ddiag = a->diag; 448 sctx.shift_top = 0; 449 for (i=0; i<n; i++) { 450 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 451 d = (a->a)[ddiag[i]]; 452 rs = -PetscAbsScalar(d) - PetscRealPart(d); 453 v = a->a+aai[i]; 454 nz = aai[i+1] - aai[i]; 455 for (j=0; j<nz; j++) 456 rs += PetscAbsScalar(v[j]); 457 if (rs>sctx.shift_top) sctx.shift_top = rs; 458 } 459 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 460 sctx.shift_top *= 1.1; 461 sctx.nshift_max = 5; 462 sctx.shift_lo = 0.; 463 sctx.shift_hi = 1.; 464 } 465 466 sctx.shift_amount = 0; 467 sctx.nshift = 0; 468 do { 469 sctx.lushift = PETSC_FALSE; 470 for (i=0; i<n; i++){ 471 nz = bi[i+1] - bi[i]; 472 bjtmp = bj + bi[i]; 473 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 474 475 /* load in initial (unfactored row) */ 476 nz = a->i[r[i]+1] - a->i[r[i]]; 477 ajtmp = a->j + a->i[r[i]]; 478 v = a->a + a->i[r[i]]; 479 for (j=0; j<nz; j++) { 480 rtmp[ics[ajtmp[j]]] = v[j]; 481 } 482 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 483 484 row = *bjtmp++; 485 while (row < i) { 486 pc = rtmp + row; 487 if (*pc != 0.0) { 488 pv = b->a + diag_offset[row]; 489 pj = b->j + diag_offset[row] + 1; 490 multiplier = *pc / *pv++; 491 *pc = multiplier; 492 nz = bi[row+1] - diag_offset[row] - 1; 493 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 494 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 495 } 496 row = *bjtmp++; 497 } 498 /* finished row so stick it into b->a */ 499 pv = b->a + bi[i] ; 500 pj = b->j + bi[i] ; 501 nz = bi[i+1] - bi[i]; 502 diag = diag_offset[i] - bi[i]; 503 rs = 0.0; 504 for (j=0; j<nz; j++) { 505 pv[j] = rtmps[pj[j]]; 506 if (j != diag) rs += PetscAbsScalar(pv[j]); 507 } 508 509 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 510 sctx.rs = rs; 511 sctx.pv = pv[diag]; 512 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 513 if (newshift == 1){ 514 break; /* sctx.shift_amount is updated */ 515 } else if (newshift == -1){ 516 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 517 } 518 } 519 520 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 521 /* 522 * if no shift in this attempt & shifting & started shifting & can refine, 523 * then try lower shift 524 */ 525 sctx.shift_hi = info->shift_fraction; 526 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 527 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 528 sctx.lushift = PETSC_TRUE; 529 sctx.nshift++; 530 } 531 } while (sctx.lushift); 532 533 /* invert diagonal entries for simplier triangular solves */ 534 for (i=0; i<n; i++) { 535 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 536 } 537 538 ierr = PetscFree(rtmp);CHKERRQ(ierr); 539 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 540 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 541 C->factor = FACTOR_LU; 542 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 543 C->assembled = PETSC_TRUE; 544 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 545 if (sctx.nshift){ 546 if (info->shiftnz) { 547 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 548 } else if (info->shiftpd) { 549 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 557 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 558 { 559 PetscFunctionBegin; 560 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 561 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 562 PetscFunctionReturn(0); 563 } 564 565 566 /* ----------------------------------------------------------- */ 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatLUFactor_SeqAIJ" 569 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 570 { 571 PetscErrorCode ierr; 572 Mat C; 573 574 PetscFunctionBegin; 575 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 576 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 577 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 578 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 /* ----------------------------------------------------------- */ 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatSolve_SeqAIJ" 584 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 585 { 586 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 587 IS iscol = a->col,isrow = a->row; 588 PetscErrorCode ierr; 589 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 590 PetscInt nz,*rout,*cout; 591 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 592 593 PetscFunctionBegin; 594 if (!n) PetscFunctionReturn(0); 595 596 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 tmp = a->solve_work; 599 600 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 601 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 602 603 /* forward solve the lower triangular */ 604 tmp[0] = b[*r++]; 605 tmps = tmp; 606 for (i=1; i<n; i++) { 607 v = aa + ai[i] ; 608 vi = aj + ai[i] ; 609 nz = a->diag[i] - ai[i]; 610 sum = b[*r++]; 611 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 612 tmp[i] = sum; 613 } 614 615 /* backward solve the upper triangular */ 616 for (i=n-1; i>=0; i--){ 617 v = aa + a->diag[i] + 1; 618 vi = aj + a->diag[i] + 1; 619 nz = ai[i+1] - a->diag[i] - 1; 620 sum = tmp[i]; 621 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 622 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 623 } 624 625 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 626 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 627 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 628 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 629 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 /* ----------------------------------------------------------- */ 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 636 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 637 { 638 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 639 PetscErrorCode ierr; 640 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 641 PetscScalar *x,*b,*aa = a->a; 642 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 643 PetscInt adiag_i,i,*vi,nz,ai_i; 644 PetscScalar *v,sum; 645 #endif 646 647 PetscFunctionBegin; 648 if (!n) PetscFunctionReturn(0); 649 650 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 651 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 652 653 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 654 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 655 #else 656 /* forward solve the lower triangular */ 657 x[0] = b[0]; 658 for (i=1; i<n; i++) { 659 ai_i = ai[i]; 660 v = aa + ai_i; 661 vi = aj + ai_i; 662 nz = adiag[i] - ai_i; 663 sum = b[i]; 664 while (nz--) sum -= *v++ * x[*vi++]; 665 x[i] = sum; 666 } 667 668 /* backward solve the upper triangular */ 669 for (i=n-1; i>=0; i--){ 670 adiag_i = adiag[i]; 671 v = aa + adiag_i + 1; 672 vi = aj + adiag_i + 1; 673 nz = ai[i+1] - adiag_i - 1; 674 sum = x[i]; 675 while (nz--) sum -= *v++ * x[*vi++]; 676 x[i] = sum*aa[adiag_i]; 677 } 678 #endif 679 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 680 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 681 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 687 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 688 { 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 690 IS iscol = a->col,isrow = a->row; 691 PetscErrorCode ierr; 692 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 693 PetscInt nz,*rout,*cout; 694 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 695 696 PetscFunctionBegin; 697 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 698 699 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 700 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 701 tmp = a->solve_work; 702 703 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 704 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 705 706 /* forward solve the lower triangular */ 707 tmp[0] = b[*r++]; 708 for (i=1; i<n; i++) { 709 v = aa + ai[i] ; 710 vi = aj + ai[i] ; 711 nz = a->diag[i] - ai[i]; 712 sum = b[*r++]; 713 while (nz--) sum -= *v++ * tmp[*vi++ ]; 714 tmp[i] = sum; 715 } 716 717 /* backward solve the upper triangular */ 718 for (i=n-1; i>=0; i--){ 719 v = aa + a->diag[i] + 1; 720 vi = aj + a->diag[i] + 1; 721 nz = ai[i+1] - a->diag[i] - 1; 722 sum = tmp[i]; 723 while (nz--) sum -= *v++ * tmp[*vi++ ]; 724 tmp[i] = sum*aa[a->diag[i]]; 725 x[*c--] += tmp[i]; 726 } 727 728 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 729 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 730 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 731 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 732 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 733 734 PetscFunctionReturn(0); 735 } 736 /* -------------------------------------------------------------------*/ 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 739 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 740 { 741 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 742 IS iscol = a->col,isrow = a->row; 743 PetscErrorCode ierr; 744 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 745 PetscInt nz,*rout,*cout,*diag = a->diag; 746 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 747 748 PetscFunctionBegin; 749 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 750 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 751 tmp = a->solve_work; 752 753 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 754 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 755 756 /* copy the b into temp work space according to permutation */ 757 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 758 759 /* forward solve the U^T */ 760 for (i=0; i<n; i++) { 761 v = aa + diag[i] ; 762 vi = aj + diag[i] + 1; 763 nz = ai[i+1] - diag[i] - 1; 764 s1 = tmp[i]; 765 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 766 while (nz--) { 767 tmp[*vi++ ] -= (*v++)*s1; 768 } 769 tmp[i] = s1; 770 } 771 772 /* backward solve the L^T */ 773 for (i=n-1; i>=0; i--){ 774 v = aa + diag[i] - 1 ; 775 vi = aj + diag[i] - 1 ; 776 nz = diag[i] - ai[i]; 777 s1 = tmp[i]; 778 while (nz--) { 779 tmp[*vi-- ] -= (*v--)*s1; 780 } 781 } 782 783 /* copy tmp into x according to permutation */ 784 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 785 786 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 787 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 788 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 789 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 790 791 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 797 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 798 { 799 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 800 IS iscol = a->col,isrow = a->row; 801 PetscErrorCode ierr; 802 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 803 PetscInt nz,*rout,*cout,*diag = a->diag; 804 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 805 806 PetscFunctionBegin; 807 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 808 809 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 810 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 811 tmp = a->solve_work; 812 813 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 814 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 815 816 /* copy the b into temp work space according to permutation */ 817 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 818 819 /* forward solve the U^T */ 820 for (i=0; i<n; i++) { 821 v = aa + diag[i] ; 822 vi = aj + diag[i] + 1; 823 nz = ai[i+1] - diag[i] - 1; 824 tmp[i] *= *v++; 825 while (nz--) { 826 tmp[*vi++ ] -= (*v++)*tmp[i]; 827 } 828 } 829 830 /* backward solve the L^T */ 831 for (i=n-1; i>=0; i--){ 832 v = aa + diag[i] - 1 ; 833 vi = aj + diag[i] - 1 ; 834 nz = diag[i] - ai[i]; 835 while (nz--) { 836 tmp[*vi-- ] -= (*v--)*tmp[i]; 837 } 838 } 839 840 /* copy tmp into x according to permutation */ 841 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 842 843 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 844 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 845 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 846 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 847 848 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 849 PetscFunctionReturn(0); 850 } 851 /* ----------------------------------------------------------------*/ 852 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 856 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 857 { 858 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 859 IS isicol; 860 PetscErrorCode ierr; 861 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 862 PetscInt *bi,*cols,nnz,*cols_lvl; 863 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 864 PetscInt i,levels,diagonal_fill; 865 PetscTruth col_identity,row_identity; 866 PetscReal f; 867 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 868 PetscBT lnkbt; 869 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 870 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 871 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 872 873 PetscFunctionBegin; 874 f = info->fill; 875 levels = (PetscInt)info->levels; 876 diagonal_fill = (PetscInt)info->diagonal_fill; 877 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 878 879 /* special case that simply copies fill pattern */ 880 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 881 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 882 if (!levels && row_identity && col_identity) { 883 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 884 (*fact)->factor = FACTOR_LU; 885 b = (Mat_SeqAIJ*)(*fact)->data; 886 if (!b->diag) { 887 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 888 } 889 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 890 b->row = isrow; 891 b->col = iscol; 892 b->icol = isicol; 893 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 894 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 895 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 896 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 901 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 902 903 /* get new row pointers */ 904 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 905 bi[0] = 0; 906 /* bdiag is location of diagonal in factor */ 907 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 908 bdiag[0] = 0; 909 910 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 911 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 912 913 /* create a linked list for storing column indices of the active row */ 914 nlnk = n + 1; 915 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 916 917 /* initial FreeSpace size is f*(ai[n]+1) */ 918 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 919 current_space = free_space; 920 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 921 current_space_lvl = free_space_lvl; 922 923 for (i=0; i<n; i++) { 924 nzi = 0; 925 /* copy current row into linked list */ 926 nnz = ai[r[i]+1] - ai[r[i]]; 927 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 928 cols = aj + ai[r[i]]; 929 lnk[i] = -1; /* marker to indicate if diagonal exists */ 930 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 931 nzi += nlnk; 932 933 /* make sure diagonal entry is included */ 934 if (diagonal_fill && lnk[i] == -1) { 935 fm = n; 936 while (lnk[fm] < i) fm = lnk[fm]; 937 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 938 lnk[fm] = i; 939 lnk_lvl[i] = 0; 940 nzi++; dcount++; 941 } 942 943 /* add pivot rows into the active row */ 944 nzbd = 0; 945 prow = lnk[n]; 946 while (prow < i) { 947 nnz = bdiag[prow]; 948 cols = bj_ptr[prow] + nnz + 1; 949 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 950 nnz = bi[prow+1] - bi[prow] - nnz - 1; 951 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 952 nzi += nlnk; 953 prow = lnk[prow]; 954 nzbd++; 955 } 956 bdiag[i] = nzbd; 957 bi[i+1] = bi[i] + nzi; 958 959 /* if free space is not available, make more free space */ 960 if (current_space->local_remaining<nzi) { 961 nnz = nzi*(n - i); /* estimated and max additional space needed */ 962 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 963 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 964 reallocs++; 965 } 966 967 /* copy data into free_space and free_space_lvl, then initialize lnk */ 968 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 969 bj_ptr[i] = current_space->array; 970 bjlvl_ptr[i] = current_space_lvl->array; 971 972 /* make sure the active row i has diagonal entry */ 973 if (*(bj_ptr[i]+bdiag[i]) != i) { 974 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 975 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 976 } 977 978 current_space->array += nzi; 979 current_space->local_used += nzi; 980 current_space->local_remaining -= nzi; 981 current_space_lvl->array += nzi; 982 current_space_lvl->local_used += nzi; 983 current_space_lvl->local_remaining -= nzi; 984 } 985 986 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 987 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 988 989 /* destroy list of free space and other temporary arrays */ 990 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 991 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 992 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 993 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 994 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 995 996 #if defined(PETSC_USE_DEBUG) 997 { 998 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 999 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 1000 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 1001 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 1002 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 1003 if (diagonal_fill) { 1004 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1005 } 1006 } 1007 #endif 1008 1009 /* put together the new matrix */ 1010 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1011 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1012 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1013 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1014 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1015 b = (Mat_SeqAIJ*)(*fact)->data; 1016 b->freedata = PETSC_TRUE; 1017 b->singlemalloc = PETSC_FALSE; 1018 len = (bi[n] )*sizeof(PetscScalar); 1019 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1020 b->j = bj; 1021 b->i = bi; 1022 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1023 b->diag = bdiag; 1024 b->ilen = 0; 1025 b->imax = 0; 1026 b->row = isrow; 1027 b->col = iscol; 1028 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1029 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1030 b->icol = isicol; 1031 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1032 /* In b structure: Free imax, ilen, old a, old j. 1033 Allocate bdiag, solve_work, new a, new j */ 1034 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1035 b->maxnz = b->nz = bi[n] ; 1036 (*fact)->factor = FACTOR_LU; 1037 (*fact)->info.factor_mallocs = reallocs; 1038 (*fact)->info.fill_ratio_given = f; 1039 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1040 1041 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1042 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1043 1044 PetscFunctionReturn(0); 1045 } 1046 1047 #include "src/mat/impls/sbaij/seq/sbaij.h" 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1050 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1051 { 1052 Mat C = *B; 1053 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1054 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1055 IS ip=b->row; 1056 PetscErrorCode ierr; 1057 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1058 PetscInt *ai=a->i,*aj=a->j; 1059 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1060 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1061 PetscReal zeropivot,rs,shiftnz; 1062 PetscReal shiftpd; 1063 ChShift_Ctx sctx; 1064 PetscInt newshift; 1065 1066 PetscFunctionBegin; 1067 shiftnz = info->shiftnz; 1068 shiftpd = info->shiftpd; 1069 zeropivot = info->zeropivot; 1070 1071 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1072 1073 /* initialization */ 1074 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1075 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1076 jl = il + mbs; 1077 rtmp = (MatScalar*)(jl + mbs); 1078 1079 sctx.shift_amount = 0; 1080 sctx.nshift = 0; 1081 do { 1082 sctx.chshift = PETSC_FALSE; 1083 for (i=0; i<mbs; i++) { 1084 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1085 } 1086 1087 for (k = 0; k<mbs; k++){ 1088 bval = ba + bi[k]; 1089 /* initialize k-th row by the perm[k]-th row of A */ 1090 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1091 for (j = jmin; j < jmax; j++){ 1092 col = rip[aj[j]]; 1093 if (col >= k){ /* only take upper triangular entry */ 1094 rtmp[col] = aa[j]; 1095 *bval++ = 0.0; /* for in-place factorization */ 1096 } 1097 } 1098 /* shift the diagonal of the matrix */ 1099 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1100 1101 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1102 dk = rtmp[k]; 1103 i = jl[k]; /* first row to be added to k_th row */ 1104 1105 while (i < k){ 1106 nexti = jl[i]; /* next row to be added to k_th row */ 1107 1108 /* compute multiplier, update diag(k) and U(i,k) */ 1109 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1110 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1111 dk += uikdi*ba[ili]; 1112 ba[ili] = uikdi; /* -U(i,k) */ 1113 1114 /* add multiple of row i to k-th row */ 1115 jmin = ili + 1; jmax = bi[i+1]; 1116 if (jmin < jmax){ 1117 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1118 /* update il and jl for row i */ 1119 il[i] = jmin; 1120 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1121 } 1122 i = nexti; 1123 } 1124 1125 /* shift the diagonals when zero pivot is detected */ 1126 /* compute rs=sum of abs(off-diagonal) */ 1127 rs = 0.0; 1128 jmin = bi[k]+1; 1129 nz = bi[k+1] - jmin; 1130 if (nz){ 1131 bcol = bj + jmin; 1132 while (nz--){ 1133 rs += PetscAbsScalar(rtmp[*bcol]); 1134 bcol++; 1135 } 1136 } 1137 1138 sctx.rs = rs; 1139 sctx.pv = dk; 1140 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1141 if (newshift == 1){ 1142 break; /* sctx.shift_amount is updated */ 1143 } else if (newshift == -1){ 1144 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1145 } 1146 1147 /* copy data into U(k,:) */ 1148 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1149 jmin = bi[k]+1; jmax = bi[k+1]; 1150 if (jmin < jmax) { 1151 for (j=jmin; j<jmax; j++){ 1152 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1153 } 1154 /* add the k-th row into il and jl */ 1155 il[k] = jmin; 1156 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1157 } 1158 } 1159 } while (sctx.chshift); 1160 ierr = PetscFree(il);CHKERRQ(ierr); 1161 1162 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1163 C->factor = FACTOR_CHOLESKY; 1164 C->assembled = PETSC_TRUE; 1165 C->preallocated = PETSC_TRUE; 1166 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1167 if (sctx.nshift){ 1168 if (shiftnz) { 1169 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1170 } else if (shiftpd) { 1171 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1172 } 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1179 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1180 { 1181 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1182 Mat_SeqSBAIJ *b; 1183 Mat B; 1184 PetscErrorCode ierr; 1185 PetscTruth perm_identity; 1186 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1187 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1188 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1189 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1190 PetscReal fill=info->fill,levels=info->levels; 1191 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1192 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1193 PetscBT lnkbt; 1194 1195 PetscFunctionBegin; 1196 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1197 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1198 1199 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1200 ui[0] = 0; 1201 1202 /* special case that simply copies fill pattern */ 1203 if (!levels && perm_identity) { 1204 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1205 for (i=0; i<am; i++) { 1206 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1207 } 1208 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1209 cols = uj; 1210 for (i=0; i<am; i++) { 1211 aj = a->j + a->diag[i]; 1212 ncols = ui[i+1] - ui[i]; 1213 for (j=0; j<ncols; j++) *cols++ = *aj++; 1214 } 1215 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1216 /* initialization */ 1217 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1218 1219 /* jl: linked list for storing indices of the pivot rows 1220 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1221 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1222 il = jl + am; 1223 uj_ptr = (PetscInt**)(il + am); 1224 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1225 for (i=0; i<am; i++){ 1226 jl[i] = am; il[i] = 0; 1227 } 1228 1229 /* create and initialize a linked list for storing column indices of the active row k */ 1230 nlnk = am + 1; 1231 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1232 1233 /* initial FreeSpace size is fill*(ai[am]+1) */ 1234 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1235 current_space = free_space; 1236 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1237 current_space_lvl = free_space_lvl; 1238 1239 for (k=0; k<am; k++){ /* for each active row k */ 1240 /* initialize lnk by the column indices of row rip[k] of A */ 1241 nzk = 0; 1242 ncols = ai[rip[k]+1] - ai[rip[k]]; 1243 ncols_upper = 0; 1244 for (j=0; j<ncols; j++){ 1245 i = *(aj + ai[rip[k]] + j); 1246 if (rip[i] >= k){ /* only take upper triangular entry */ 1247 ajtmp[ncols_upper] = i; 1248 ncols_upper++; 1249 } 1250 } 1251 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1252 nzk += nlnk; 1253 1254 /* update lnk by computing fill-in for each pivot row to be merged in */ 1255 prow = jl[k]; /* 1st pivot row */ 1256 1257 while (prow < k){ 1258 nextprow = jl[prow]; 1259 1260 /* merge prow into k-th row */ 1261 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1262 jmax = ui[prow+1]; 1263 ncols = jmax-jmin; 1264 i = jmin - ui[prow]; 1265 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1266 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1267 j = *(uj - 1); 1268 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1269 nzk += nlnk; 1270 1271 /* update il and jl for prow */ 1272 if (jmin < jmax){ 1273 il[prow] = jmin; 1274 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1275 } 1276 prow = nextprow; 1277 } 1278 1279 /* if free space is not available, make more free space */ 1280 if (current_space->local_remaining<nzk) { 1281 i = am - k + 1; /* num of unfactored rows */ 1282 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1283 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1284 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1285 reallocs++; 1286 } 1287 1288 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1289 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1290 1291 /* add the k-th row into il and jl */ 1292 if (nzk > 1){ 1293 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1294 jl[k] = jl[i]; jl[i] = k; 1295 il[k] = ui[k] + 1; 1296 } 1297 uj_ptr[k] = current_space->array; 1298 uj_lvl_ptr[k] = current_space_lvl->array; 1299 1300 current_space->array += nzk; 1301 current_space->local_used += nzk; 1302 current_space->local_remaining -= nzk; 1303 1304 current_space_lvl->array += nzk; 1305 current_space_lvl->local_used += nzk; 1306 current_space_lvl->local_remaining -= nzk; 1307 1308 ui[k+1] = ui[k] + nzk; 1309 } 1310 1311 #if defined(PETSC_USE_DEBUG) 1312 if (ai[am] != 0) { 1313 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1314 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1315 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1316 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1317 } else { 1318 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1319 } 1320 #endif 1321 1322 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1323 ierr = PetscFree(jl);CHKERRQ(ierr); 1324 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1325 1326 /* destroy list of free space and other temporary array(s) */ 1327 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1328 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1329 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1330 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1331 1332 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1333 1334 /* put together the new matrix in MATSEQSBAIJ format */ 1335 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1336 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1337 B = *fact; 1338 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1339 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1340 1341 b = (Mat_SeqSBAIJ*)B->data; 1342 b->singlemalloc = PETSC_FALSE; 1343 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1344 b->j = uj; 1345 b->i = ui; 1346 b->diag = 0; 1347 b->ilen = 0; 1348 b->imax = 0; 1349 b->row = perm; 1350 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1351 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1352 b->icol = perm; 1353 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1354 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1355 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1356 b->maxnz = b->nz = ui[am]; 1357 1358 B->factor = FACTOR_CHOLESKY; 1359 B->info.factor_mallocs = reallocs; 1360 B->info.fill_ratio_given = fill; 1361 if (ai[am] != 0) { 1362 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1363 } else { 1364 B->info.fill_ratio_needed = 0.0; 1365 } 1366 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1367 if (perm_identity){ 1368 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1369 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1376 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1377 { 1378 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1379 Mat_SeqSBAIJ *b; 1380 Mat B; 1381 PetscErrorCode ierr; 1382 PetscTruth perm_identity; 1383 PetscReal fill = info->fill; 1384 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1385 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1386 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1387 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1388 PetscBT lnkbt; 1389 IS iperm; 1390 1391 PetscFunctionBegin; 1392 /* check whether perm is the identity mapping */ 1393 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1394 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1395 1396 if (!perm_identity){ 1397 /* check if perm is symmetric! */ 1398 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1399 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1400 for (i=0; i<am; i++) { 1401 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1402 } 1403 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1404 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1405 } 1406 1407 /* initialization */ 1408 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1409 ui[0] = 0; 1410 1411 /* jl: linked list for storing indices of the pivot rows 1412 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1413 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1414 il = jl + am; 1415 cols = il + am; 1416 ui_ptr = (PetscInt**)(cols + am); 1417 for (i=0; i<am; i++){ 1418 jl[i] = am; il[i] = 0; 1419 } 1420 1421 /* create and initialize a linked list for storing column indices of the active row k */ 1422 nlnk = am + 1; 1423 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1424 1425 /* initial FreeSpace size is fill*(ai[am]+1) */ 1426 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1427 current_space = free_space; 1428 1429 for (k=0; k<am; k++){ /* for each active row k */ 1430 /* initialize lnk by the column indices of row rip[k] of A */ 1431 nzk = 0; 1432 ncols = ai[rip[k]+1] - ai[rip[k]]; 1433 ncols_upper = 0; 1434 for (j=0; j<ncols; j++){ 1435 i = rip[*(aj + ai[rip[k]] + j)]; 1436 if (i >= k){ /* only take upper triangular entry */ 1437 cols[ncols_upper] = i; 1438 ncols_upper++; 1439 } 1440 } 1441 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1442 nzk += nlnk; 1443 1444 /* update lnk by computing fill-in for each pivot row to be merged in */ 1445 prow = jl[k]; /* 1st pivot row */ 1446 1447 while (prow < k){ 1448 nextprow = jl[prow]; 1449 /* merge prow into k-th row */ 1450 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1451 jmax = ui[prow+1]; 1452 ncols = jmax-jmin; 1453 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1454 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1455 nzk += nlnk; 1456 1457 /* update il and jl for prow */ 1458 if (jmin < jmax){ 1459 il[prow] = jmin; 1460 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1461 } 1462 prow = nextprow; 1463 } 1464 1465 /* if free space is not available, make more free space */ 1466 if (current_space->local_remaining<nzk) { 1467 i = am - k + 1; /* num of unfactored rows */ 1468 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1469 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1470 reallocs++; 1471 } 1472 1473 /* copy data into free space, then initialize lnk */ 1474 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1475 1476 /* add the k-th row into il and jl */ 1477 if (nzk-1 > 0){ 1478 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1479 jl[k] = jl[i]; jl[i] = k; 1480 il[k] = ui[k] + 1; 1481 } 1482 ui_ptr[k] = current_space->array; 1483 current_space->array += nzk; 1484 current_space->local_used += nzk; 1485 current_space->local_remaining -= nzk; 1486 1487 ui[k+1] = ui[k] + nzk; 1488 } 1489 1490 #if defined(PETSC_USE_DEBUG) 1491 if (ai[am] != 0) { 1492 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1493 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1494 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1495 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1496 } else { 1497 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1498 } 1499 #endif 1500 1501 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1502 ierr = PetscFree(jl);CHKERRQ(ierr); 1503 1504 /* destroy list of free space and other temporary array(s) */ 1505 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1506 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1507 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1508 1509 /* put together the new matrix in MATSEQSBAIJ format */ 1510 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1511 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1512 B = *fact; 1513 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1514 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1515 1516 b = (Mat_SeqSBAIJ*)B->data; 1517 b->singlemalloc = PETSC_FALSE; 1518 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1519 b->j = uj; 1520 b->i = ui; 1521 b->diag = 0; 1522 b->ilen = 0; 1523 b->imax = 0; 1524 b->row = perm; 1525 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1526 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1527 b->icol = perm; 1528 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1529 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1530 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1531 b->maxnz = b->nz = ui[am]; 1532 1533 B->factor = FACTOR_CHOLESKY; 1534 B->info.factor_mallocs = reallocs; 1535 B->info.fill_ratio_given = fill; 1536 if (ai[am] != 0) { 1537 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1538 } else { 1539 B->info.fill_ratio_needed = 0.0; 1540 } 1541 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1542 if (perm_identity){ 1543 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1544 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1545 } 1546 PetscFunctionReturn(0); 1547 } 1548