1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(1,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr; 61 int *c,*r,*ic,i,n = A->m,sierr; 62 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 int *ordcol,*iwk,*iperm,*jw; 64 int jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal permtol,af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (int)(info->dtcount/2.0); 75 jmax = (int)(info->fill*a->nz); 76 permtol = info->dtcol; 77 78 79 /* ------------------------------------------------------------ 80 If reorder=.TRUE., then the original matrix has to be 81 reordered to reflect the user selected ordering scheme, and 82 then de-reordered so it is in it's original format. 83 Because Saad's dperm() is NOT in place, we have to copy 84 the original matrix and allocate more storage. . . 85 ------------------------------------------------------------ 86 */ 87 88 /* set reorder to true if either isrow or iscol is not identity */ 89 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 90 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 91 reorder = PetscNot(reorder); 92 93 94 /* storage for ilu factor */ 95 ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 98 ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr); 99 100 /* ------------------------------------------------------------ 101 Make sure that everything is Fortran formatted (1-Based) 102 ------------------------------------------------------------ 103 */ 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 151 case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 152 case -5: SETERRQ(1,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(1,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax); 155 default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 222 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 223 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 224 (*fact)->factor = FACTOR_LU; 225 (*fact)->assembled = PETSC_TRUE; 226 227 b = (Mat_SeqAIJ*)(*fact)->data; 228 ierr = PetscFree(b->imax);CHKERRQ(ierr); 229 b->sorted = PETSC_FALSE; 230 b->singlemalloc = PETSC_FALSE; 231 /* the next line frees the default space generated by the MatCreate() */ 232 ierr = PetscFree(b->a);CHKERRQ(ierr); 233 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 248 249 /* check out for identical nodes. If found, use inode functions */ 250 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 251 252 af = ((double)b->nz)/((double)a->nz) + .001; 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 256 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 /* 263 Factorization code for AIJ format. 264 */ 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 267 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 268 { 269 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 270 IS isicol; 271 PetscErrorCode ierr; 272 int *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 273 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 274 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 275 PetscReal f; 276 277 PetscFunctionBegin; 278 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 285 ainew[0] = 0; 286 /* don't know how many column pointers are needed so estimate */ 287 f = info->fill; 288 jmax = (int)(f*ai[n]+1); 289 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 290 /* fill is a linked list of nonzeros in active row */ 291 ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 292 im = fill + n + 1; 293 /* idnew is location of diagonal in factor */ 294 ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 295 idnew[0] = 0; 296 297 for (i=0; i<n; i++) { 298 /* first copy previous fill into linked list */ 299 nnz = nz = ai[r[i]+1] - ai[r[i]]; 300 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 301 ajtmp = aj + ai[r[i]]; 302 fill[n] = n; 303 while (nz--) { 304 fm = n; 305 idx = ic[*ajtmp++]; 306 do { 307 m = fm; 308 fm = fill[m]; 309 } while (fm < idx); 310 fill[m] = idx; 311 fill[idx] = fm; 312 } 313 row = fill[n]; 314 while (row < i) { 315 ajtmp = ajnew + idnew[row] + 1; 316 nzbd = 1 + idnew[row] - ainew[row]; 317 nz = im[row] - nzbd; 318 fm = row; 319 while (nz-- > 0) { 320 idx = *ajtmp++ ; 321 nzbd++; 322 if (idx == i) im[row] = nzbd; 323 do { 324 m = fm; 325 fm = fill[m]; 326 } while (fm < idx); 327 if (fm != idx) { 328 fill[m] = idx; 329 fill[idx] = fm; 330 fm = idx; 331 nnz++; 332 } 333 } 334 row = fill[row]; 335 } 336 /* copy new filled row into permanent storage */ 337 ainew[i+1] = ainew[i] + nnz; 338 if (ainew[i+1] > jmax) { 339 340 /* estimate how much additional space we will need */ 341 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 342 /* just double the memory each time */ 343 int maxadd = jmax; 344 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 345 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 346 jmax += maxadd; 347 348 /* allocate a longer ajnew */ 349 ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 350 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(int));CHKERRQ(ierr); 351 ierr = PetscFree(ajnew);CHKERRQ(ierr); 352 ajnew = ajtmp; 353 realloc++; /* count how many times we realloc */ 354 } 355 ajtmp = ajnew + ainew[i]; 356 fm = fill[n]; 357 nzi = 0; 358 im[i] = nnz; 359 while (nnz--) { 360 if (fm < i) nzi++; 361 *ajtmp++ = fm ; 362 fm = fill[fm]; 363 } 364 idnew[i] = ainew[i] + nzi; 365 } 366 if (ai[n] != 0) { 367 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 371 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 372 } else { 373 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 374 } 375 376 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 377 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 378 379 ierr = PetscFree(fill);CHKERRQ(ierr); 380 381 /* put together the new matrix */ 382 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 383 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 384 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 385 PetscLogObjectParent(*B,isicol); 386 b = (Mat_SeqAIJ*)(*B)->data; 387 ierr = PetscFree(b->imax);CHKERRQ(ierr); 388 b->singlemalloc = PETSC_FALSE; 389 /* the next line frees the default space generated by the Create() */ 390 ierr = PetscFree(b->a);CHKERRQ(ierr); 391 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 392 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 393 b->j = ajnew; 394 b->i = ainew; 395 b->diag = idnew; 396 b->ilen = 0; 397 b->imax = 0; 398 b->row = isrow; 399 b->col = iscol; 400 b->lu_damping = info->damping; 401 b->lu_zeropivot = info->zeropivot; 402 b->lu_shift = info->shift; 403 b->lu_shift_fraction = info->shift_fraction; 404 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 405 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 406 b->icol = isicol; 407 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 408 /* In b structure: Free imax, ilen, old a, old j. 409 Allocate idnew, solve_work, new a, new j */ 410 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(PetscScalar))); 411 b->maxnz = b->nz = ainew[n] ; 412 413 (*B)->factor = FACTOR_LU; 414 (*B)->info.factor_mallocs = realloc; 415 (*B)->info.fill_ratio_given = f; 416 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 417 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 418 419 if (ai[n] != 0) { 420 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 421 } else { 422 (*B)->info.fill_ratio_needed = 0.0; 423 } 424 PetscFunctionReturn(0); 425 } 426 /* ----------------------------------------------------------- */ 427 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 431 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 432 { 433 Mat C = *B; 434 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 435 IS isrow = b->row,isicol = b->icol; 436 PetscErrorCode ierr; 437 int *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j; 438 int *ajtmpold,*ajtmp,nz,row,*ics; 439 int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 440 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 441 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 442 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 443 PetscTruth damp,lushift; 444 445 PetscFunctionBegin; 446 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 447 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 448 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 449 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 450 rtmps = rtmp; ics = ic; 451 452 if (!a->diag) { 453 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 454 } 455 456 if (b->lu_shift) { /* set max shift */ 457 int *aai = a->i,*ddiag = a->diag; 458 shift_top = 0; 459 for (i=0; i<n; i++) { 460 d = PetscAbsScalar((a->a)[ddiag[i]]); 461 /* calculate amt of shift needed for this row */ 462 if (d<=0) { 463 row_shift = 0; 464 } else { 465 row_shift = -2*d; 466 } 467 v = a->a+aai[i]; 468 for (j=0; j<aai[i+1]-aai[i]; j++) 469 row_shift += PetscAbsScalar(v[j]); 470 if (row_shift>shift_top) shift_top = row_shift; 471 } 472 } 473 474 shift_fraction = 0; shift_amount = 0; 475 do { 476 damp = PETSC_FALSE; 477 lushift = PETSC_FALSE; 478 for (i=0; i<n; i++) { 479 nz = ai[i+1] - ai[i]; 480 ajtmp = aj + ai[i]; 481 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 482 483 /* load in initial (unfactored row) */ 484 nz = a->i[r[i]+1] - a->i[r[i]]; 485 ajtmpold = a->j + a->i[r[i]]; 486 v = a->a + a->i[r[i]]; 487 for (j=0; j<nz; j++) { 488 rtmp[ics[ajtmpold[j]]] = v[j]; 489 } 490 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 491 492 row = *ajtmp++ ; 493 while (row < i) { 494 pc = rtmp + row; 495 if (*pc != 0.0) { 496 pv = b->a + diag_offset[row]; 497 pj = b->j + diag_offset[row] + 1; 498 multiplier = *pc / *pv++; 499 *pc = multiplier; 500 nz = ai[row+1] - diag_offset[row] - 1; 501 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 502 PetscLogFlops(2*nz); 503 } 504 row = *ajtmp++; 505 } 506 /* finished row so stick it into b->a */ 507 pv = b->a + ai[i] ; 508 pj = b->j + ai[i] ; 509 nz = ai[i+1] - ai[i]; 510 diag = diag_offset[i] - ai[i]; 511 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 512 rs = 0.0; 513 for (j=0; j<nz; j++) { 514 pv[j] = rtmps[pj[j]]; 515 if (j != diag) rs += PetscAbsScalar(pv[j]); 516 } 517 #define MAX_NSHIFT 5 518 if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) { 519 if (nshift>MAX_NSHIFT) { 520 SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner"); 521 } else if (nshift==MAX_NSHIFT) { 522 shift_fraction = shift_hi; 523 lushift = PETSC_FALSE; 524 } else { 525 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 526 lushift = PETSC_TRUE; 527 } 528 shift_amount = shift_fraction * shift_top; 529 nshift++; 530 break; 531 } 532 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) { 533 if (damping) { 534 if (ndamp) damping *= 2.0; 535 damp = PETSC_TRUE; 536 ndamp++; 537 break; 538 } else { 539 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 540 } 541 } 542 } 543 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 544 /* 545 * if no shift in this attempt & shifting & started shifting & can refine, 546 * then try lower shift 547 */ 548 shift_hi = shift_fraction; 549 shift_fraction = (shift_hi+shift_lo)/2.; 550 shift_amount = shift_fraction * shift_top; 551 lushift = PETSC_TRUE; 552 nshift++; 553 } 554 } while (damp || lushift); 555 556 /* invert diagonal entries for simplier triangular solves */ 557 for (i=0; i<n; i++) { 558 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 559 } 560 561 ierr = PetscFree(rtmp);CHKERRQ(ierr); 562 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 563 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 564 C->factor = FACTOR_LU; 565 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 566 C->assembled = PETSC_TRUE; 567 PetscLogFlops(C->n); 568 if (ndamp) { 569 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 570 } 571 if (nshift) { 572 b->lu_shift_fraction = shift_fraction; 573 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %d\n",shift_fraction,shift_top,nshift); 574 } 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 580 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 581 { 582 PetscFunctionBegin; 583 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 584 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 585 PetscFunctionReturn(0); 586 } 587 588 589 /* ----------------------------------------------------------- */ 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatLUFactor_SeqAIJ" 592 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 593 { 594 PetscErrorCode ierr; 595 Mat C; 596 597 PetscFunctionBegin; 598 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 599 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 600 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 601 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 602 PetscFunctionReturn(0); 603 } 604 /* ----------------------------------------------------------- */ 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatSolve_SeqAIJ" 607 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 608 { 609 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 610 IS iscol = a->col,isrow = a->row; 611 PetscErrorCode ierr; 612 int *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 613 int nz,*rout,*cout; 614 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 615 616 PetscFunctionBegin; 617 if (!n) PetscFunctionReturn(0); 618 619 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 620 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 621 tmp = a->solve_work; 622 623 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 624 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 625 626 /* forward solve the lower triangular */ 627 tmp[0] = b[*r++]; 628 tmps = tmp; 629 for (i=1; i<n; i++) { 630 v = aa + ai[i] ; 631 vi = aj + ai[i] ; 632 nz = a->diag[i] - ai[i]; 633 sum = b[*r++]; 634 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 635 tmp[i] = sum; 636 } 637 638 /* backward solve the upper triangular */ 639 for (i=n-1; i>=0; i--){ 640 v = aa + a->diag[i] + 1; 641 vi = aj + a->diag[i] + 1; 642 nz = ai[i+1] - a->diag[i] - 1; 643 sum = tmp[i]; 644 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 645 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 646 } 647 648 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 649 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 650 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 651 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 652 PetscLogFlops(2*a->nz - A->n); 653 PetscFunctionReturn(0); 654 } 655 656 /* ----------------------------------------------------------- */ 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 659 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 660 { 661 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 662 PetscErrorCode ierr; 663 int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 664 PetscScalar *x,*b,*aa = a->a; 665 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 666 int adiag_i,i,*vi,nz,ai_i; 667 PetscScalar *v,sum; 668 #endif 669 670 PetscFunctionBegin; 671 if (!n) PetscFunctionReturn(0); 672 673 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 674 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 675 676 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 677 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 678 #else 679 /* forward solve the lower triangular */ 680 x[0] = b[0]; 681 for (i=1; i<n; i++) { 682 ai_i = ai[i]; 683 v = aa + ai_i; 684 vi = aj + ai_i; 685 nz = adiag[i] - ai_i; 686 sum = b[i]; 687 while (nz--) sum -= *v++ * x[*vi++]; 688 x[i] = sum; 689 } 690 691 /* backward solve the upper triangular */ 692 for (i=n-1; i>=0; i--){ 693 adiag_i = adiag[i]; 694 v = aa + adiag_i + 1; 695 vi = aj + adiag_i + 1; 696 nz = ai[i+1] - adiag_i - 1; 697 sum = x[i]; 698 while (nz--) sum -= *v++ * x[*vi++]; 699 x[i] = sum*aa[adiag_i]; 700 } 701 #endif 702 PetscLogFlops(2*a->nz - A->n); 703 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 704 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 710 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 711 { 712 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 713 IS iscol = a->col,isrow = a->row; 714 PetscErrorCode ierr; 715 int *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 716 int nz,*rout,*cout; 717 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 718 719 PetscFunctionBegin; 720 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 721 722 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 723 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 724 tmp = a->solve_work; 725 726 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 727 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 728 729 /* forward solve the lower triangular */ 730 tmp[0] = b[*r++]; 731 for (i=1; i<n; i++) { 732 v = aa + ai[i] ; 733 vi = aj + ai[i] ; 734 nz = a->diag[i] - ai[i]; 735 sum = b[*r++]; 736 while (nz--) sum -= *v++ * tmp[*vi++ ]; 737 tmp[i] = sum; 738 } 739 740 /* backward solve the upper triangular */ 741 for (i=n-1; i>=0; i--){ 742 v = aa + a->diag[i] + 1; 743 vi = aj + a->diag[i] + 1; 744 nz = ai[i+1] - a->diag[i] - 1; 745 sum = tmp[i]; 746 while (nz--) sum -= *v++ * tmp[*vi++ ]; 747 tmp[i] = sum*aa[a->diag[i]]; 748 x[*c--] += tmp[i]; 749 } 750 751 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 752 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 753 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 754 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 755 PetscLogFlops(2*a->nz); 756 757 PetscFunctionReturn(0); 758 } 759 /* -------------------------------------------------------------------*/ 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 762 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 763 { 764 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 765 IS iscol = a->col,isrow = a->row; 766 PetscErrorCode ierr; 767 int *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 768 int nz,*rout,*cout,*diag = a->diag; 769 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 770 771 PetscFunctionBegin; 772 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 773 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 774 tmp = a->solve_work; 775 776 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 777 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 778 779 /* copy the b into temp work space according to permutation */ 780 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 781 782 /* forward solve the U^T */ 783 for (i=0; i<n; i++) { 784 v = aa + diag[i] ; 785 vi = aj + diag[i] + 1; 786 nz = ai[i+1] - diag[i] - 1; 787 s1 = tmp[i]; 788 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 789 while (nz--) { 790 tmp[*vi++ ] -= (*v++)*s1; 791 } 792 tmp[i] = s1; 793 } 794 795 /* backward solve the L^T */ 796 for (i=n-1; i>=0; i--){ 797 v = aa + diag[i] - 1 ; 798 vi = aj + diag[i] - 1 ; 799 nz = diag[i] - ai[i]; 800 s1 = tmp[i]; 801 while (nz--) { 802 tmp[*vi-- ] -= (*v--)*s1; 803 } 804 } 805 806 /* copy tmp into x according to permutation */ 807 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 808 809 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 810 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 811 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 812 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 813 814 PetscLogFlops(2*a->nz-A->n); 815 PetscFunctionReturn(0); 816 } 817 818 #undef __FUNCT__ 819 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 820 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 821 { 822 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 823 IS iscol = a->col,isrow = a->row; 824 PetscErrorCode ierr; 825 int *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 826 int nz,*rout,*cout,*diag = a->diag; 827 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 828 829 PetscFunctionBegin; 830 if (zz != xx) VecCopy(zz,xx); 831 832 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 833 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 834 tmp = a->solve_work; 835 836 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 837 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 838 839 /* copy the b into temp work space according to permutation */ 840 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 841 842 /* forward solve the U^T */ 843 for (i=0; i<n; i++) { 844 v = aa + diag[i] ; 845 vi = aj + diag[i] + 1; 846 nz = ai[i+1] - diag[i] - 1; 847 tmp[i] *= *v++; 848 while (nz--) { 849 tmp[*vi++ ] -= (*v++)*tmp[i]; 850 } 851 } 852 853 /* backward solve the L^T */ 854 for (i=n-1; i>=0; i--){ 855 v = aa + diag[i] - 1 ; 856 vi = aj + diag[i] - 1 ; 857 nz = diag[i] - ai[i]; 858 while (nz--) { 859 tmp[*vi-- ] -= (*v--)*tmp[i]; 860 } 861 } 862 863 /* copy tmp into x according to permutation */ 864 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 865 866 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 867 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 868 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 869 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 870 871 PetscLogFlops(2*a->nz); 872 PetscFunctionReturn(0); 873 } 874 /* ----------------------------------------------------------------*/ 875 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 879 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 880 { 881 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 882 IS isicol; 883 PetscErrorCode ierr; 884 int *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 885 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 886 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 887 int incrlev,nnz,i,levels,diagonal_fill; 888 PetscTruth col_identity,row_identity; 889 PetscReal f; 890 891 PetscFunctionBegin; 892 f = info->fill; 893 levels = (int)info->levels; 894 diagonal_fill = (int)info->diagonal_fill; 895 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 896 897 /* special case that simply copies fill pattern */ 898 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 899 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 900 if (!levels && row_identity && col_identity) { 901 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 902 (*fact)->factor = FACTOR_LU; 903 b = (Mat_SeqAIJ*)(*fact)->data; 904 if (!b->diag) { 905 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 906 } 907 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 908 b->row = isrow; 909 b->col = iscol; 910 b->icol = isicol; 911 b->lu_damping = info->damping; 912 b->lu_zeropivot = info->zeropivot; 913 b->lu_shift = info->shift; 914 b->lu_shift_fraction= info->shift_fraction; 915 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 916 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 917 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 918 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 922 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 923 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 924 925 /* get new row pointers */ 926 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 927 ainew[0] = 0; 928 /* don't know how many column pointers are needed so estimate */ 929 jmax = (int)(f*(ai[n]+1)); 930 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 931 /* ajfill is level of fill for each fill entry */ 932 ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr); 933 /* fill is a linked list of nonzeros in active row */ 934 ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr); 935 /* im is level for each filled value */ 936 ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr); 937 /* dloc is location of diagonal in factor */ 938 ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr); 939 dloc[0] = 0; 940 for (prow=0; prow<n; prow++) { 941 942 /* copy current row into linked list */ 943 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 944 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 945 xi = aj + ai[r[prow]] ; 946 fill[n] = n; 947 fill[prow] = -1; /* marker to indicate if diagonal exists */ 948 while (nz--) { 949 fm = n; 950 idx = ic[*xi++ ]; 951 do { 952 m = fm; 953 fm = fill[m]; 954 } while (fm < idx); 955 fill[m] = idx; 956 fill[idx] = fm; 957 im[idx] = 0; 958 } 959 960 /* make sure diagonal entry is included */ 961 if (diagonal_fill && fill[prow] == -1) { 962 fm = n; 963 while (fill[fm] < prow) fm = fill[fm]; 964 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 965 fill[fm] = prow; 966 im[prow] = 0; 967 nzf++; 968 dcount++; 969 } 970 971 nzi = 0; 972 row = fill[n]; 973 while (row < prow) { 974 incrlev = im[row] + 1; 975 nz = dloc[row]; 976 xi = ajnew + ainew[row] + nz + 1; 977 flev = ajfill + ainew[row] + nz + 1; 978 nnz = ainew[row+1] - ainew[row] - nz - 1; 979 fm = row; 980 while (nnz-- > 0) { 981 idx = *xi++ ; 982 if (*flev + incrlev > levels) { 983 flev++; 984 continue; 985 } 986 do { 987 m = fm; 988 fm = fill[m]; 989 } while (fm < idx); 990 if (fm != idx) { 991 im[idx] = *flev + incrlev; 992 fill[m] = idx; 993 fill[idx] = fm; 994 fm = idx; 995 nzf++; 996 } else { 997 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 998 } 999 flev++; 1000 } 1001 row = fill[row]; 1002 nzi++; 1003 } 1004 /* copy new filled row into permanent storage */ 1005 ainew[prow+1] = ainew[prow] + nzf; 1006 if (ainew[prow+1] > jmax) { 1007 1008 /* estimate how much additional space we will need */ 1009 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1010 /* just double the memory each time */ 1011 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1012 int maxadd = jmax; 1013 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1014 jmax += maxadd; 1015 1016 /* allocate a longer ajnew and ajfill */ 1017 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1018 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 1019 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1020 ajnew = xi; 1021 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1022 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 1023 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1024 ajfill = xi; 1025 realloc++; /* count how many times we realloc */ 1026 } 1027 xi = ajnew + ainew[prow] ; 1028 flev = ajfill + ainew[prow] ; 1029 dloc[prow] = nzi; 1030 fm = fill[n]; 1031 while (nzf--) { 1032 *xi++ = fm ; 1033 *flev++ = im[fm]; 1034 fm = fill[fm]; 1035 } 1036 /* make sure row has diagonal entry */ 1037 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1038 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\ 1039 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1040 } 1041 } 1042 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1043 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1044 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1045 ierr = PetscFree(fill);CHKERRQ(ierr); 1046 ierr = PetscFree(im);CHKERRQ(ierr); 1047 1048 { 1049 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1050 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1051 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1052 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1053 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1054 if (diagonal_fill) { 1055 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount); 1056 } 1057 } 1058 1059 /* put together the new matrix */ 1060 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1061 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1062 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1063 PetscLogObjectParent(*fact,isicol); 1064 b = (Mat_SeqAIJ*)(*fact)->data; 1065 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1066 b->singlemalloc = PETSC_FALSE; 1067 len = (ainew[n] )*sizeof(PetscScalar); 1068 /* the next line frees the default space generated by the Create() */ 1069 ierr = PetscFree(b->a);CHKERRQ(ierr); 1070 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1071 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1072 b->j = ajnew; 1073 b->i = ainew; 1074 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1075 b->diag = dloc; 1076 b->ilen = 0; 1077 b->imax = 0; 1078 b->row = isrow; 1079 b->col = iscol; 1080 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1081 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1082 b->icol = isicol; 1083 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1084 /* In b structure: Free imax, ilen, old a, old j. 1085 Allocate dloc, solve_work, new a, new j */ 1086 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(int)+sizeof(PetscScalar))); 1087 b->maxnz = b->nz = ainew[n] ; 1088 b->lu_damping = info->damping; 1089 b->lu_shift = info->shift; 1090 b->lu_shift_fraction = info->shift_fraction; 1091 b->lu_zeropivot = info->zeropivot; 1092 (*fact)->factor = FACTOR_LU; 1093 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1094 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1095 1096 (*fact)->info.factor_mallocs = realloc; 1097 (*fact)->info.fill_ratio_given = f; 1098 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #include "src/mat/impls/sbaij/seq/sbaij.h" 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1105 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1106 { 1107 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 if (!a->sbaijMat){ 1112 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1113 } 1114 1115 ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr); 1116 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1117 a->sbaijMat = PETSC_NULL; 1118 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1124 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1125 { 1126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 PetscTruth perm_identity; 1129 1130 PetscFunctionBegin; 1131 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1132 if (!perm_identity){ 1133 SETERRQ(1,"Non-identity permutation is not supported yet"); 1134 } 1135 if (!a->sbaijMat){ 1136 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1137 } 1138 1139 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1140 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1141 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1147 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1148 { 1149 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscTruth perm_identity; 1152 1153 PetscFunctionBegin; 1154 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1155 if (!perm_identity){ 1156 SETERRQ(1,"Non-identity permutation is not supported yet"); 1157 } 1158 if (!a->sbaijMat){ 1159 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1160 } 1161 1162 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1163 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1164 1165 PetscFunctionReturn(0); 1166 } 1167