1 /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 21 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 22 23 EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*); 24 EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*); 25 EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*); 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ------------------------------------------------------------ 49 */ 50 int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 51 { 52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 53 PetscFunctionBegin; 54 SETERRQ(1,"This distribution does not include GNU Copyright code\n\ 55 You can obtain the drop tolerance routines by installing PETSc from\n\ 56 www.mcs.anl.gov/petsc\n"); 57 #else 58 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 59 IS iscolf,isicol,isirow; 60 PetscTruth reorder; 61 int *c,*r,*ic,ierr,i,n = A->m; 62 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 int *ordcol,*iwk,*iperm,*jw; 64 int jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal permtol,af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (int)(info->dtcount/2.0); 75 jmax = (int)(info->fill*a->nz); 76 permtol = info->dtcol; 77 78 79 /* ------------------------------------------------------------ 80 If reorder=.TRUE., then the original matrix has to be 81 reordered to reflect the user selected ordering scheme, and 82 then de-reordered so it is in it's original format. 83 Because Saad's dperm() is NOT in place, we have to copy 84 the original matrix and allocate more storage. . . 85 ------------------------------------------------------------ 86 */ 87 88 /* set reorder to true if either isrow or iscol is not identity */ 89 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 90 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 91 reorder = PetscNot(reorder); 92 93 94 /* storage for ilu factor */ 95 ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 98 ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr); 99 100 /* ------------------------------------------------------------ 101 Make sure that everything is Fortran formatted (1-Based) 102 ------------------------------------------------------------ 103 */ 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 148 if (ierr) { 149 switch (ierr) { 150 case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 151 case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 152 case -5: SETERRQ(1,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(1,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax); 155 default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 222 (*fact)->factor = FACTOR_LU; 223 (*fact)->assembled = PETSC_TRUE; 224 225 b = (Mat_SeqAIJ*)(*fact)->data; 226 ierr = PetscFree(b->imax);CHKERRQ(ierr); 227 b->sorted = PETSC_FALSE; 228 b->singlemalloc = PETSC_FALSE; 229 /* the next line frees the default space generated by the MatCreate() */ 230 ierr = PetscFree(b->a);CHKERRQ(ierr); 231 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 246 247 /* check out for identical nodes. If found, use inode functions */ 248 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 255 256 PetscFunctionReturn(0); 257 #endif 258 } 259 260 /* 261 Factorization code for AIJ format. 262 */ 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 265 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 266 { 267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 268 IS isicol; 269 int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j; 270 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 271 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 272 PetscReal f; 273 274 PetscFunctionBegin; 275 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 276 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 278 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 279 280 /* get new row pointers */ 281 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 282 ainew[0] = 0; 283 /* don't know how many column pointers are needed so estimate */ 284 f = info->fill; 285 jmax = (int)(f*ai[n]+1); 286 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 287 /* fill is a linked list of nonzeros in active row */ 288 ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 289 im = fill + n + 1; 290 /* idnew is location of diagonal in factor */ 291 ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 292 idnew[0] = 0; 293 294 for (i=0; i<n; i++) { 295 /* first copy previous fill into linked list */ 296 nnz = nz = ai[r[i]+1] - ai[r[i]]; 297 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 298 ajtmp = aj + ai[r[i]]; 299 fill[n] = n; 300 while (nz--) { 301 fm = n; 302 idx = ic[*ajtmp++]; 303 do { 304 m = fm; 305 fm = fill[m]; 306 } while (fm < idx); 307 fill[m] = idx; 308 fill[idx] = fm; 309 } 310 row = fill[n]; 311 while (row < i) { 312 ajtmp = ajnew + idnew[row] + 1; 313 nzbd = 1 + idnew[row] - ainew[row]; 314 nz = im[row] - nzbd; 315 fm = row; 316 while (nz-- > 0) { 317 idx = *ajtmp++ ; 318 nzbd++; 319 if (idx == i) im[row] = nzbd; 320 do { 321 m = fm; 322 fm = fill[m]; 323 } while (fm < idx); 324 if (fm != idx) { 325 fill[m] = idx; 326 fill[idx] = fm; 327 fm = idx; 328 nnz++; 329 } 330 } 331 row = fill[row]; 332 } 333 /* copy new filled row into permanent storage */ 334 ainew[i+1] = ainew[i] + nnz; 335 if (ainew[i+1] > jmax) { 336 337 /* estimate how much additional space we will need */ 338 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 339 /* just double the memory each time */ 340 int maxadd = jmax; 341 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 342 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 343 jmax += maxadd; 344 345 /* allocate a longer ajnew */ 346 ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 347 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(int));CHKERRQ(ierr); 348 ierr = PetscFree(ajnew);CHKERRQ(ierr); 349 ajnew = ajtmp; 350 realloc++; /* count how many times we realloc */ 351 } 352 ajtmp = ajnew + ainew[i]; 353 fm = fill[n]; 354 nzi = 0; 355 im[i] = nnz; 356 while (nnz--) { 357 if (fm < i) nzi++; 358 *ajtmp++ = fm ; 359 fm = fill[fm]; 360 } 361 idnew[i] = ainew[i] + nzi; 362 } 363 if (ai[n] != 0) { 364 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 365 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 366 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 369 } else { 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 371 } 372 373 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 374 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 375 376 ierr = PetscFree(fill);CHKERRQ(ierr); 377 378 /* put together the new matrix */ 379 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 380 PetscLogObjectParent(*B,isicol); 381 b = (Mat_SeqAIJ*)(*B)->data; 382 ierr = PetscFree(b->imax);CHKERRQ(ierr); 383 b->singlemalloc = PETSC_FALSE; 384 /* the next line frees the default space generated by the Create() */ 385 ierr = PetscFree(b->a);CHKERRQ(ierr); 386 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 387 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 388 b->j = ajnew; 389 b->i = ainew; 390 b->diag = idnew; 391 b->ilen = 0; 392 b->imax = 0; 393 b->row = isrow; 394 b->col = iscol; 395 b->lu_damping = info->damping; 396 b->lu_zeropivot = info->zeropivot; 397 b->lu_shift = info->shift; 398 b->lu_shift_fraction = info->shift_fraction; 399 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 400 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 401 b->icol = isicol; 402 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 403 /* In b structure: Free imax, ilen, old a, old j. 404 Allocate idnew, solve_work, new a, new j */ 405 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(PetscScalar))); 406 b->maxnz = b->nz = ainew[n] ; 407 408 (*B)->factor = FACTOR_LU; 409 (*B)->info.factor_mallocs = realloc; 410 (*B)->info.fill_ratio_given = f; 411 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 412 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 413 414 if (ai[n] != 0) { 415 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 416 } else { 417 (*B)->info.fill_ratio_needed = 0.0; 418 } 419 PetscFunctionReturn(0); 420 } 421 /* ----------------------------------------------------------- */ 422 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 426 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 427 { 428 Mat C = *B; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 430 IS isrow = b->row,isicol = b->icol; 431 int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j; 432 int *ajtmpold,*ajtmp,nz,row,*ics; 433 int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 434 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 435 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 436 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 437 PetscTruth damp,lushift; 438 439 PetscFunctionBegin; 440 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 441 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 442 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 443 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 444 rtmps = rtmp; ics = ic; 445 446 if (!a->diag) { 447 ierr = MatMarkDiagonal_SeqAIJ(A); CHKERRQ(ierr); 448 } 449 450 if (b->lu_shift) { /* set max shift */ 451 int *aai = a->i,*ddiag = a->diag; 452 shift_top = 0; 453 for (i=0; i<n; i++) { 454 d = PetscAbsScalar((a->a)[ddiag[i]]); 455 /* calculate amt of shift needed for this row */ 456 if (d<=0) { 457 row_shift = 0; 458 } else { 459 row_shift = -2*d; 460 } 461 v = a->a+aai[i]; 462 for (j=0; j<aai[i+1]-aai[i]; j++) 463 row_shift += PetscAbsScalar(v[j]); 464 if (row_shift>shift_top) shift_top = row_shift; 465 } 466 } 467 468 shift_fraction = 0; shift_amount = 0; 469 do { 470 damp = PETSC_FALSE; 471 lushift = PETSC_FALSE; 472 for (i=0; i<n; i++) { 473 nz = ai[i+1] - ai[i]; 474 ajtmp = aj + ai[i]; 475 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 476 477 /* load in initial (unfactored row) */ 478 nz = a->i[r[i]+1] - a->i[r[i]]; 479 ajtmpold = a->j + a->i[r[i]]; 480 v = a->a + a->i[r[i]]; 481 for (j=0; j<nz; j++) { 482 rtmp[ics[ajtmpold[j]]] = v[j]; 483 } 484 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 485 486 row = *ajtmp++ ; 487 while (row < i) { 488 pc = rtmp + row; 489 if (*pc != 0.0) { 490 pv = b->a + diag_offset[row] ; 491 pj = b->j + diag_offset[row] + 1; 492 multiplier = *pc / *pv++; 493 *pc = multiplier; 494 nz = ai[row+1] - diag_offset[row] - 1; 495 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 496 PetscLogFlops(2*nz); 497 } 498 row = *ajtmp++; 499 } 500 /* finished row so stick it into b->a */ 501 pv = b->a + ai[i] ; 502 pj = b->j + ai[i] ; 503 nz = ai[i+1] - ai[i]; 504 diag = diag_offset[i] - ai[i]; 505 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 506 rs = 0.0; 507 for (j=0; j<nz; j++) { 508 pv[j] = rtmps[pj[j]]; 509 if (j != diag) rs += PetscAbsScalar(pv[j]); 510 } 511 #define MAX_NSHIFT 5 512 if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) { 513 if (nshift>MAX_NSHIFT) { 514 SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner"); 515 } else if (nshift==MAX_NSHIFT) { 516 shift_fraction = shift_hi; 517 lushift = PETSC_FALSE; 518 } else { 519 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 520 lushift = PETSC_TRUE; 521 } 522 shift_amount = shift_fraction * shift_top; 523 nshift++; 524 break; 525 } 526 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) { 527 if (damping) { 528 if (ndamp) damping *= 2.0; 529 damp = PETSC_TRUE; 530 ndamp++; 531 break; 532 } else { 533 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 534 } 535 } 536 } 537 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 538 /* 539 * if no shift in this attempt & shifting & started shifting & can refine, 540 * then try lower shift 541 */ 542 shift_hi = shift_fraction; 543 shift_fraction = (shift_hi+shift_lo)/2.; 544 shift_amount = shift_fraction * shift_top; 545 lushift = PETSC_TRUE; 546 nshift++; 547 } 548 } while (damp || lushift); 549 550 /* invert diagonal entries for simplier triangular solves */ 551 for (i=0; i<n; i++) { 552 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 553 } 554 555 ierr = PetscFree(rtmp);CHKERRQ(ierr); 556 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 557 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 558 C->factor = FACTOR_LU; 559 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 560 C->assembled = PETSC_TRUE; 561 PetscLogFlops(C->n); 562 if (ndamp) { 563 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 564 } 565 if (nshift) { 566 b->lu_shift_fraction = shift_fraction; 567 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %d\n",shift_fraction,shift_top,nshift); 568 } 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 574 int MatUsePETSc_SeqAIJ(Mat A) 575 { 576 PetscFunctionBegin; 577 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 578 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 579 PetscFunctionReturn(0); 580 } 581 582 583 /* ----------------------------------------------------------- */ 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatLUFactor_SeqAIJ" 586 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 587 { 588 int ierr; 589 Mat C; 590 591 PetscFunctionBegin; 592 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 593 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 594 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 595 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 596 PetscFunctionReturn(0); 597 } 598 /* ----------------------------------------------------------- */ 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatSolve_SeqAIJ" 601 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 602 { 603 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 604 IS iscol = a->col,isrow = a->row; 605 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 606 int nz,*rout,*cout; 607 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 608 609 PetscFunctionBegin; 610 if (!n) PetscFunctionReturn(0); 611 612 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 613 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 614 tmp = a->solve_work; 615 616 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 617 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 618 619 /* forward solve the lower triangular */ 620 tmp[0] = b[*r++]; 621 tmps = tmp; 622 for (i=1; i<n; i++) { 623 v = aa + ai[i] ; 624 vi = aj + ai[i] ; 625 nz = a->diag[i] - ai[i]; 626 sum = b[*r++]; 627 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 628 tmp[i] = sum; 629 } 630 631 /* backward solve the upper triangular */ 632 for (i=n-1; i>=0; i--){ 633 v = aa + a->diag[i] + 1; 634 vi = aj + a->diag[i] + 1; 635 nz = ai[i+1] - a->diag[i] - 1; 636 sum = tmp[i]; 637 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 638 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 639 } 640 641 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 642 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 643 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 644 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 645 PetscLogFlops(2*a->nz - A->n); 646 PetscFunctionReturn(0); 647 } 648 649 /* ----------------------------------------------------------- */ 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 652 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 653 { 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 655 int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 656 PetscScalar *x,*b,*aa = a->a; 657 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 658 int adiag_i,i,*vi,nz,ai_i; 659 PetscScalar *v,sum; 660 #endif 661 662 PetscFunctionBegin; 663 if (!n) PetscFunctionReturn(0); 664 665 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 666 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 667 668 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 669 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 670 #else 671 /* forward solve the lower triangular */ 672 x[0] = b[0]; 673 for (i=1; i<n; i++) { 674 ai_i = ai[i]; 675 v = aa + ai_i; 676 vi = aj + ai_i; 677 nz = adiag[i] - ai_i; 678 sum = b[i]; 679 while (nz--) sum -= *v++ * x[*vi++]; 680 x[i] = sum; 681 } 682 683 /* backward solve the upper triangular */ 684 for (i=n-1; i>=0; i--){ 685 adiag_i = adiag[i]; 686 v = aa + adiag_i + 1; 687 vi = aj + adiag_i + 1; 688 nz = ai[i+1] - adiag_i - 1; 689 sum = x[i]; 690 while (nz--) sum -= *v++ * x[*vi++]; 691 x[i] = sum*aa[adiag_i]; 692 } 693 #endif 694 PetscLogFlops(2*a->nz - A->n); 695 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 696 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 697 PetscFunctionReturn(0); 698 } 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 702 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 703 { 704 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 705 IS iscol = a->col,isrow = a->row; 706 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 707 int nz,*rout,*cout; 708 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 709 710 PetscFunctionBegin; 711 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 712 713 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 714 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 715 tmp = a->solve_work; 716 717 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 718 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 719 720 /* forward solve the lower triangular */ 721 tmp[0] = b[*r++]; 722 for (i=1; i<n; i++) { 723 v = aa + ai[i] ; 724 vi = aj + ai[i] ; 725 nz = a->diag[i] - ai[i]; 726 sum = b[*r++]; 727 while (nz--) sum -= *v++ * tmp[*vi++ ]; 728 tmp[i] = sum; 729 } 730 731 /* backward solve the upper triangular */ 732 for (i=n-1; i>=0; i--){ 733 v = aa + a->diag[i] + 1; 734 vi = aj + a->diag[i] + 1; 735 nz = ai[i+1] - a->diag[i] - 1; 736 sum = tmp[i]; 737 while (nz--) sum -= *v++ * tmp[*vi++ ]; 738 tmp[i] = sum*aa[a->diag[i]]; 739 x[*c--] += tmp[i]; 740 } 741 742 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 743 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 744 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 745 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 746 PetscLogFlops(2*a->nz); 747 748 PetscFunctionReturn(0); 749 } 750 /* -------------------------------------------------------------------*/ 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 753 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 754 { 755 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 756 IS iscol = a->col,isrow = a->row; 757 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 758 int nz,*rout,*cout,*diag = a->diag; 759 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 760 761 PetscFunctionBegin; 762 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 763 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 764 tmp = a->solve_work; 765 766 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 767 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 768 769 /* copy the b into temp work space according to permutation */ 770 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 771 772 /* forward solve the U^T */ 773 for (i=0; i<n; i++) { 774 v = aa + diag[i] ; 775 vi = aj + diag[i] + 1; 776 nz = ai[i+1] - diag[i] - 1; 777 s1 = tmp[i]; 778 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 779 while (nz--) { 780 tmp[*vi++ ] -= (*v++)*s1; 781 } 782 tmp[i] = s1; 783 } 784 785 /* backward solve the L^T */ 786 for (i=n-1; i>=0; i--){ 787 v = aa + diag[i] - 1 ; 788 vi = aj + diag[i] - 1 ; 789 nz = diag[i] - ai[i]; 790 s1 = tmp[i]; 791 while (nz--) { 792 tmp[*vi-- ] -= (*v--)*s1; 793 } 794 } 795 796 /* copy tmp into x according to permutation */ 797 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 798 799 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 800 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 801 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 802 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 803 804 PetscLogFlops(2*a->nz-A->n); 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 810 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 811 { 812 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 813 IS iscol = a->col,isrow = a->row; 814 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 815 int nz,*rout,*cout,*diag = a->diag; 816 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 817 818 PetscFunctionBegin; 819 if (zz != xx) VecCopy(zz,xx); 820 821 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 822 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 823 tmp = a->solve_work; 824 825 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 826 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 827 828 /* copy the b into temp work space according to permutation */ 829 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 830 831 /* forward solve the U^T */ 832 for (i=0; i<n; i++) { 833 v = aa + diag[i] ; 834 vi = aj + diag[i] + 1; 835 nz = ai[i+1] - diag[i] - 1; 836 tmp[i] *= *v++; 837 while (nz--) { 838 tmp[*vi++ ] -= (*v++)*tmp[i]; 839 } 840 } 841 842 /* backward solve the L^T */ 843 for (i=n-1; i>=0; i--){ 844 v = aa + diag[i] - 1 ; 845 vi = aj + diag[i] - 1 ; 846 nz = diag[i] - ai[i]; 847 while (nz--) { 848 tmp[*vi-- ] -= (*v--)*tmp[i]; 849 } 850 } 851 852 /* copy tmp into x according to permutation */ 853 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 854 855 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 856 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 857 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 858 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 859 860 PetscLogFlops(2*a->nz); 861 PetscFunctionReturn(0); 862 } 863 /* ----------------------------------------------------------------*/ 864 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 868 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 869 { 870 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 871 IS isicol; 872 int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j; 873 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 874 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 875 int incrlev,nnz,i,levels,diagonal_fill; 876 PetscTruth col_identity,row_identity; 877 PetscReal f; 878 879 PetscFunctionBegin; 880 f = info->fill; 881 levels = (int)info->levels; 882 diagonal_fill = (int)info->diagonal_fill; 883 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 884 885 /* special case that simply copies fill pattern */ 886 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 887 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 888 if (!levels && row_identity && col_identity) { 889 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 890 (*fact)->factor = FACTOR_LU; 891 b = (Mat_SeqAIJ*)(*fact)->data; 892 if (!b->diag) { 893 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 894 } 895 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 896 b->row = isrow; 897 b->col = iscol; 898 b->icol = isicol; 899 b->lu_damping = info->damping; 900 b->lu_zeropivot = info->zeropivot; 901 b->lu_shift = info->shift; 902 b->lu_shift_fraction= info->shift_fraction; 903 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 904 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 905 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 906 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 911 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 912 913 /* get new row pointers */ 914 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 915 ainew[0] = 0; 916 /* don't know how many column pointers are needed so estimate */ 917 jmax = (int)(f*(ai[n]+1)); 918 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 919 /* ajfill is level of fill for each fill entry */ 920 ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr); 921 /* fill is a linked list of nonzeros in active row */ 922 ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr); 923 /* im is level for each filled value */ 924 ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr); 925 /* dloc is location of diagonal in factor */ 926 ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr); 927 dloc[0] = 0; 928 for (prow=0; prow<n; prow++) { 929 930 /* copy current row into linked list */ 931 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 932 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 933 xi = aj + ai[r[prow]] ; 934 fill[n] = n; 935 fill[prow] = -1; /* marker to indicate if diagonal exists */ 936 while (nz--) { 937 fm = n; 938 idx = ic[*xi++ ]; 939 do { 940 m = fm; 941 fm = fill[m]; 942 } while (fm < idx); 943 fill[m] = idx; 944 fill[idx] = fm; 945 im[idx] = 0; 946 } 947 948 /* make sure diagonal entry is included */ 949 if (diagonal_fill && fill[prow] == -1) { 950 fm = n; 951 while (fill[fm] < prow) fm = fill[fm]; 952 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 953 fill[fm] = prow; 954 im[prow] = 0; 955 nzf++; 956 dcount++; 957 } 958 959 nzi = 0; 960 row = fill[n]; 961 while (row < prow) { 962 incrlev = im[row] + 1; 963 nz = dloc[row]; 964 xi = ajnew + ainew[row] + nz + 1; 965 flev = ajfill + ainew[row] + nz + 1; 966 nnz = ainew[row+1] - ainew[row] - nz - 1; 967 fm = row; 968 while (nnz-- > 0) { 969 idx = *xi++ ; 970 if (*flev + incrlev > levels) { 971 flev++; 972 continue; 973 } 974 do { 975 m = fm; 976 fm = fill[m]; 977 } while (fm < idx); 978 if (fm != idx) { 979 im[idx] = *flev + incrlev; 980 fill[m] = idx; 981 fill[idx] = fm; 982 fm = idx; 983 nzf++; 984 } else { 985 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 986 } 987 flev++; 988 } 989 row = fill[row]; 990 nzi++; 991 } 992 /* copy new filled row into permanent storage */ 993 ainew[prow+1] = ainew[prow] + nzf; 994 if (ainew[prow+1] > jmax) { 995 996 /* estimate how much additional space we will need */ 997 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 998 /* just double the memory each time */ 999 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1000 int maxadd = jmax; 1001 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1002 jmax += maxadd; 1003 1004 /* allocate a longer ajnew and ajfill */ 1005 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1006 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 1007 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1008 ajnew = xi; 1009 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1010 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 1011 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1012 ajfill = xi; 1013 realloc++; /* count how many times we realloc */ 1014 } 1015 xi = ajnew + ainew[prow] ; 1016 flev = ajfill + ainew[prow] ; 1017 dloc[prow] = nzi; 1018 fm = fill[n]; 1019 while (nzf--) { 1020 *xi++ = fm ; 1021 *flev++ = im[fm]; 1022 fm = fill[fm]; 1023 } 1024 /* make sure row has diagonal entry */ 1025 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1026 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\ 1027 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1028 } 1029 } 1030 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1031 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1032 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1033 ierr = PetscFree(fill);CHKERRQ(ierr); 1034 ierr = PetscFree(im);CHKERRQ(ierr); 1035 1036 { 1037 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1038 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1039 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1040 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1041 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1042 if (diagonal_fill) { 1043 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount); 1044 } 1045 } 1046 1047 /* put together the new matrix */ 1048 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1049 PetscLogObjectParent(*fact,isicol); 1050 b = (Mat_SeqAIJ*)(*fact)->data; 1051 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1052 b->singlemalloc = PETSC_FALSE; 1053 len = (ainew[n] )*sizeof(PetscScalar); 1054 /* the next line frees the default space generated by the Create() */ 1055 ierr = PetscFree(b->a);CHKERRQ(ierr); 1056 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1057 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1058 b->j = ajnew; 1059 b->i = ainew; 1060 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1061 b->diag = dloc; 1062 b->ilen = 0; 1063 b->imax = 0; 1064 b->row = isrow; 1065 b->col = iscol; 1066 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1067 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1068 b->icol = isicol; 1069 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1070 /* In b structure: Free imax, ilen, old a, old j. 1071 Allocate dloc, solve_work, new a, new j */ 1072 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(int)+sizeof(PetscScalar))); 1073 b->maxnz = b->nz = ainew[n] ; 1074 b->lu_damping = info->damping; 1075 b->lu_shift = info->shift; 1076 b->lu_shift_fraction = info->shift_fraction; 1077 b->lu_zeropivot = info->zeropivot; 1078 (*fact)->factor = FACTOR_LU; 1079 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1080 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1081 1082 (*fact)->info.factor_mallocs = realloc; 1083 (*fact)->info.fill_ratio_given = f; 1084 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #include "src/mat/impls/sbaij/seq/sbaij.h" 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1091 int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1092 { 1093 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1094 int ierr; 1095 1096 PetscFunctionBegin; 1097 if (!a->sbaijMat){ 1098 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1099 } 1100 1101 ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr); 1102 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1103 a->sbaijMat = PETSC_NULL; 1104 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1110 int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1111 { 1112 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1113 int ierr; 1114 PetscTruth perm_identity; 1115 1116 PetscFunctionBegin; 1117 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1118 if (!perm_identity){ 1119 SETERRQ(1,"Non-identity permutation is not supported yet"); 1120 } 1121 if (!a->sbaijMat){ 1122 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1123 } 1124 1125 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1126 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1127 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1133 int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1134 { 1135 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1136 int ierr; 1137 PetscTruth perm_identity; 1138 1139 PetscFunctionBegin; 1140 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1141 if (!perm_identity){ 1142 SETERRQ(1,"Non-identity permutation is not supported yet"); 1143 } 1144 if (!a->sbaijMat){ 1145 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1146 } 1147 1148 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1149 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1150 1151 PetscFunctionReturn(0); 1152 } 1153