1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/seq/aij.h" 4 #include "../src/inline/dot.h" 5 #include "../src/inline/spops.h" 6 #include "petscbt.h" 7 #include "../src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 const PetscInt *c,*r,*ic; 64 PetscInt i,n = A->rmap->n,*cc,*cr; 65 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 PetscInt *ordcol,*iwk,*iperm,*jw; 67 PetscInt jmax,lfill,job,*o_i,*o_j; 68 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 69 PetscReal af,dt,dtcount,dtcol,fill; 70 71 PetscFunctionBegin; 72 73 if (info->dt == PETSC_DEFAULT) dt = .005; else dt = info->dt; 74 if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); else dtcount = info->dtcount; 75 if (info->dtcol == PETSC_DEFAULT) dtcol = .01; else dtcol = info->dtcol; 76 if (info->fill == PETSC_DEFAULT) fill = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill; 77 lfill = (PetscInt)(dtcount/2.0); 78 jmax = (PetscInt)(fill*a->nz); 79 80 81 /* ------------------------------------------------------------ 82 If reorder=.TRUE., then the original matrix has to be 83 reordered to reflect the user selected ordering scheme, and 84 then de-reordered so it is in it's original format. 85 Because Saad's dperm() is NOT in place, we have to copy 86 the original matrix and allocate more storage. . . 87 ------------------------------------------------------------ 88 */ 89 90 /* set reorder to true if either isrow or iscol is not identity */ 91 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 92 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 93 reorder = PetscNot(reorder); 94 95 96 /* storage for ilu factor */ 97 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 100 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 101 102 /* ------------------------------------------------------------ 103 Make sure that everything is Fortran formatted (1-Based) 104 ------------------------------------------------------------ 105 */ 106 for (i=old_i[0];i<old_i[n];i++) { 107 old_j[i]++; 108 } 109 for(i=0;i<n+1;i++) { 110 old_i[i]++; 111 }; 112 113 114 if (reorder) { 115 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 116 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 117 ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 cr[i] = r[i]+1; 120 cc[i] = c[i]+1; 121 } 122 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 123 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 124 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job); 128 ierr = PetscFree2(cc,cr);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 222 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = MAT_FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->free_a = PETSC_TRUE; 230 b->free_ij = PETSC_TRUE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 af = ((double)b->nz)/((double)a->nz) + .001; 246 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr); 247 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 249 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 250 251 if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ; 252 else (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 253 (*fact)->ops->solveadd = MatSolveAdd_SeqAIJ; 254 (*fact)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 255 (*fact)->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 256 257 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 258 259 PetscFunctionReturn(0); 260 #endif 261 } 262 263 EXTERN_C_BEGIN 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 266 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 267 { 268 PetscFunctionBegin; 269 *flg = PETSC_TRUE; 270 PetscFunctionReturn(0); 271 } 272 EXTERN_C_END 273 274 EXTERN_C_BEGIN 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 277 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 278 { 279 PetscInt n = A->rmap->n; 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 284 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 285 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 286 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 287 if (ftype == MAT_FACTOR_ILU) { 288 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 289 } else { 290 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 291 } 292 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 293 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 294 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 295 if (ftype == MAT_FACTOR_ICC) { 296 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 297 } else { 298 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 299 } 300 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 301 (*B)->factor = ftype; 302 PetscFunctionReturn(0); 303 } 304 EXTERN_C_END 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 308 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 309 { 310 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 311 IS isicol; 312 PetscErrorCode ierr; 313 const PetscInt *r,*ic; 314 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 315 PetscInt *bi,*bj,*ajtmp; 316 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 317 PetscReal f; 318 PetscInt nlnk,*lnk,k,**bi_ptr; 319 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 320 PetscBT lnkbt; 321 322 PetscFunctionBegin; 323 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 324 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 325 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 326 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 327 328 /* get new row pointers */ 329 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 330 bi[0] = 0; 331 332 /* bdiag is location of diagonal in factor */ 333 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 334 bdiag[0] = 0; 335 336 /* linked list for storing column indices of the active row */ 337 nlnk = n + 1; 338 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 339 340 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 341 342 /* initial FreeSpace size is f*(ai[n]+1) */ 343 f = info->fill; 344 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 345 current_space = free_space; 346 347 for (i=0; i<n; i++) { 348 /* copy previous fill into linked list */ 349 nzi = 0; 350 nnz = ai[r[i]+1] - ai[r[i]]; 351 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 352 ajtmp = aj + ai[r[i]]; 353 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 354 nzi += nlnk; 355 356 /* add pivot rows into linked list */ 357 row = lnk[n]; 358 while (row < i) { 359 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 360 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 361 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 362 nzi += nlnk; 363 row = lnk[row]; 364 } 365 bi[i+1] = bi[i] + nzi; 366 im[i] = nzi; 367 368 /* mark bdiag */ 369 nzbd = 0; 370 nnz = nzi; 371 k = lnk[n]; 372 while (nnz-- && k < i){ 373 nzbd++; 374 k = lnk[k]; 375 } 376 bdiag[i] = bi[i] + nzbd; 377 378 /* if free space is not available, make more free space */ 379 if (current_space->local_remaining<nzi) { 380 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 381 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 382 reallocs++; 383 } 384 385 /* copy data into free space, then initialize lnk */ 386 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 387 bi_ptr[i] = current_space->array; 388 current_space->array += nzi; 389 current_space->local_used += nzi; 390 current_space->local_remaining -= nzi; 391 } 392 #if defined(PETSC_USE_INFO) 393 if (ai[n] != 0) { 394 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 395 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 396 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 397 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 398 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 399 } else { 400 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 401 } 402 #endif 403 404 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 405 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 406 407 /* destroy list of free space and other temporary array(s) */ 408 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 409 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 410 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 411 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 412 413 /* put together the new matrix */ 414 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 415 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 416 b = (Mat_SeqAIJ*)(B)->data; 417 b->free_a = PETSC_TRUE; 418 b->free_ij = PETSC_TRUE; 419 b->singlemalloc = PETSC_FALSE; 420 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 421 b->j = bj; 422 b->i = bi; 423 b->diag = bdiag; 424 b->ilen = 0; 425 b->imax = 0; 426 b->row = isrow; 427 b->col = iscol; 428 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 429 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 430 b->icol = isicol; 431 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 432 433 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 434 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 435 b->maxnz = b->nz = bi[n] ; 436 437 (B)->factor = MAT_FACTOR_LU; 438 (B)->info.factor_mallocs = reallocs; 439 (B)->info.fill_ratio_given = f; 440 441 if (ai[n] != 0) { 442 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 443 } else { 444 (B)->info.fill_ratio_needed = 0.0; 445 } 446 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 447 (B)->ops->solve = MatSolve_SeqAIJ; 448 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 449 /* switch to inodes if appropriate */ 450 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 /* 455 Trouble in factorization, should we dump the original matrix? 456 */ 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatFactorDumpMatrix" 459 PetscErrorCode MatFactorDumpMatrix(Mat A) 460 { 461 PetscErrorCode ierr; 462 PetscTruth flg; 463 464 PetscFunctionBegin; 465 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 466 if (flg) { 467 PetscViewer viewer; 468 char filename[PETSC_MAX_PATH_LEN]; 469 470 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 471 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 472 ierr = MatView(A,viewer);CHKERRQ(ierr); 473 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 474 } 475 PetscFunctionReturn(0); 476 } 477 478 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 479 480 /* ----------------------------------------------------------- */ 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 483 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 484 { 485 Mat C=B; 486 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 487 IS isrow = b->row,isicol = b->icol; 488 PetscErrorCode ierr; 489 const PetscInt *r,*ic,*ics; 490 PetscInt i,j,n=A->rmap->n,*bi=b->i,*bj=b->j; 491 PetscInt *ajtmp,*bjtmp,nz,row; 492 PetscInt *diag_offset = b->diag,diag,*pj; 493 PetscScalar *rtmp,*pc,multiplier,*rtmps; 494 MatScalar *v,*pv; 495 PetscScalar d; 496 PetscReal rs; 497 LUShift_Ctx sctx; 498 PetscInt newshift,*ddiag; 499 500 PetscFunctionBegin; 501 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 502 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 503 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 504 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 505 rtmps = rtmp; ics = ic; 506 507 sctx.shift_top = 0; 508 sctx.nshift_max = 0; 509 sctx.shift_lo = 0; 510 sctx.shift_hi = 0; 511 sctx.shift_fraction = 0; 512 513 /* if both shift schemes are chosen by user, only use info->shiftpd */ 514 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 515 PetscInt *aai = a->i; 516 ddiag = a->diag; 517 sctx.shift_top = 0; 518 for (i=0; i<n; i++) { 519 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 520 d = (a->a)[ddiag[i]]; 521 rs = -PetscAbsScalar(d) - PetscRealPart(d); 522 v = a->a+aai[i]; 523 nz = aai[i+1] - aai[i]; 524 for (j=0; j<nz; j++) 525 rs += PetscAbsScalar(v[j]); 526 if (rs>sctx.shift_top) sctx.shift_top = rs; 527 } 528 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 529 sctx.shift_top *= 1.1; 530 sctx.nshift_max = 5; 531 sctx.shift_lo = 0.; 532 sctx.shift_hi = 1.; 533 } 534 535 sctx.shift_amount = 0; 536 sctx.nshift = 0; 537 do { 538 sctx.lushift = PETSC_FALSE; 539 for (i=0; i<n; i++){ 540 nz = bi[i+1] - bi[i]; 541 bjtmp = bj + bi[i]; 542 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 543 544 /* load in initial (unfactored row) */ 545 nz = a->i[r[i]+1] - a->i[r[i]]; 546 ajtmp = a->j + a->i[r[i]]; 547 v = a->a + a->i[r[i]]; 548 for (j=0; j<nz; j++) { 549 rtmp[ics[ajtmp[j]]] = v[j]; 550 } 551 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 552 553 row = *bjtmp++; 554 while (row < i) { 555 pc = rtmp + row; 556 if (*pc != 0.0) { 557 pv = b->a + diag_offset[row]; 558 pj = b->j + diag_offset[row] + 1; 559 multiplier = *pc / *pv++; 560 *pc = multiplier; 561 nz = bi[row+1] - diag_offset[row] - 1; 562 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 563 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 564 } 565 row = *bjtmp++; 566 } 567 /* finished row so stick it into b->a */ 568 pv = b->a + bi[i] ; 569 pj = b->j + bi[i] ; 570 nz = bi[i+1] - bi[i]; 571 diag = diag_offset[i] - bi[i]; 572 rs = 0.0; 573 for (j=0; j<nz; j++) { 574 pv[j] = rtmps[pj[j]]; 575 if (j != diag) rs += PetscAbsScalar(pv[j]); 576 } 577 578 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 579 sctx.rs = rs; 580 sctx.pv = pv[diag]; 581 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 582 if (newshift == 1) break; 583 } 584 585 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 586 /* 587 * if no shift in this attempt & shifting & started shifting & can refine, 588 * then try lower shift 589 */ 590 sctx.shift_hi = sctx.shift_fraction; 591 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 592 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 593 sctx.lushift = PETSC_TRUE; 594 sctx.nshift++; 595 } 596 } while (sctx.lushift); 597 598 /* invert diagonal entries for simplier triangular solves */ 599 for (i=0; i<n; i++) { 600 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 601 } 602 ierr = PetscFree(rtmp);CHKERRQ(ierr); 603 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 604 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 605 if (b->inode.use) { 606 C->ops->solve = MatSolve_Inode; 607 } else { 608 PetscTruth row_identity, col_identity; 609 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 610 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 611 if (row_identity && col_identity) { 612 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 613 } else { 614 C->ops->solve = MatSolve_SeqAIJ; 615 } 616 } 617 C->ops->solveadd = MatSolveAdd_SeqAIJ; 618 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 619 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 620 C->ops->matsolve = MatMatSolve_SeqAIJ; 621 C->assembled = PETSC_TRUE; 622 C->preallocated = PETSC_TRUE; 623 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 624 if (sctx.nshift){ 625 if (info->shiftpd) { 626 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 627 } else if (info->shiftnz) { 628 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 /* 635 This routine implements inplace ILU(0) with row or/and column permutations. 636 Input: 637 A - original matrix 638 Output; 639 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 640 a->j (col index) is permuted by the inverse of colperm, then sorted 641 a->a reordered accordingly with a->j 642 a->diag (ptr to diagonal elements) is updated. 643 */ 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 646 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 647 { 648 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 649 IS isrow = a->row,isicol = a->icol; 650 PetscErrorCode ierr; 651 const PetscInt *r,*ic,*ics; 652 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 653 PetscInt *ajtmp,nz,row; 654 PetscInt *diag = a->diag,nbdiag,*pj; 655 PetscScalar *rtmp,*pc,multiplier,d; 656 MatScalar *v,*pv; 657 PetscReal rs; 658 LUShift_Ctx sctx; 659 PetscInt newshift; 660 661 PetscFunctionBegin; 662 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 663 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 664 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 665 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 666 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 667 ics = ic; 668 669 sctx.shift_top = 0; 670 sctx.nshift_max = 0; 671 sctx.shift_lo = 0; 672 sctx.shift_hi = 0; 673 sctx.shift_fraction = 0; 674 675 /* if both shift schemes are chosen by user, only use info->shiftpd */ 676 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 677 sctx.shift_top = 0; 678 for (i=0; i<n; i++) { 679 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 680 d = (a->a)[diag[i]]; 681 rs = -PetscAbsScalar(d) - PetscRealPart(d); 682 v = a->a+ai[i]; 683 nz = ai[i+1] - ai[i]; 684 for (j=0; j<nz; j++) 685 rs += PetscAbsScalar(v[j]); 686 if (rs>sctx.shift_top) sctx.shift_top = rs; 687 } 688 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 689 sctx.shift_top *= 1.1; 690 sctx.nshift_max = 5; 691 sctx.shift_lo = 0.; 692 sctx.shift_hi = 1.; 693 } 694 695 sctx.shift_amount = 0; 696 sctx.nshift = 0; 697 do { 698 sctx.lushift = PETSC_FALSE; 699 for (i=0; i<n; i++){ 700 /* load in initial unfactored row */ 701 nz = ai[r[i]+1] - ai[r[i]]; 702 ajtmp = aj + ai[r[i]]; 703 v = a->a + ai[r[i]]; 704 /* sort permuted ajtmp and values v accordingly */ 705 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 706 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 707 708 diag[r[i]] = ai[r[i]]; 709 for (j=0; j<nz; j++) { 710 rtmp[ajtmp[j]] = v[j]; 711 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 712 } 713 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 714 715 row = *ajtmp++; 716 while (row < i) { 717 pc = rtmp + row; 718 if (*pc != 0.0) { 719 pv = a->a + diag[r[row]]; 720 pj = aj + diag[r[row]] + 1; 721 722 multiplier = *pc / *pv++; 723 *pc = multiplier; 724 nz = ai[r[row]+1] - diag[r[row]] - 1; 725 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 726 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 727 } 728 row = *ajtmp++; 729 } 730 /* finished row so overwrite it onto a->a */ 731 pv = a->a + ai[r[i]] ; 732 pj = aj + ai[r[i]] ; 733 nz = ai[r[i]+1] - ai[r[i]]; 734 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 735 736 rs = 0.0; 737 for (j=0; j<nz; j++) { 738 pv[j] = rtmp[pj[j]]; 739 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 740 } 741 742 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 743 sctx.rs = rs; 744 sctx.pv = pv[nbdiag]; 745 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 746 if (newshift == 1) break; 747 } 748 749 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 750 /* 751 * if no shift in this attempt & shifting & started shifting & can refine, 752 * then try lower shift 753 */ 754 sctx.shift_hi = sctx.shift_fraction; 755 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 756 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 757 sctx.lushift = PETSC_TRUE; 758 sctx.nshift++; 759 } 760 } while (sctx.lushift); 761 762 /* invert diagonal entries for simplier triangular solves */ 763 for (i=0; i<n; i++) { 764 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 765 } 766 767 ierr = PetscFree(rtmp);CHKERRQ(ierr); 768 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 769 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 770 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 771 A->ops->solveadd = MatSolveAdd_SeqAIJ; 772 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 773 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 774 A->assembled = PETSC_TRUE; 775 A->preallocated = PETSC_TRUE; 776 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 777 if (sctx.nshift){ 778 if (info->shiftpd) { 779 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 780 } else if (info->shiftnz) { 781 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 782 } 783 } 784 PetscFunctionReturn(0); 785 } 786 787 /* ----------------------------------------------------------- */ 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatLUFactor_SeqAIJ" 790 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 791 { 792 PetscErrorCode ierr; 793 Mat C; 794 795 PetscFunctionBegin; 796 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 797 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 798 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 799 A->ops->solve = C->ops->solve; 800 A->ops->solvetranspose = C->ops->solvetranspose; 801 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 802 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 803 PetscFunctionReturn(0); 804 } 805 /* ----------------------------------------------------------- */ 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatSolve_SeqAIJ" 808 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 809 { 810 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 811 IS iscol = a->col,isrow = a->row; 812 PetscErrorCode ierr; 813 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 814 PetscInt nz; 815 const PetscInt *rout,*cout,*r,*c; 816 PetscScalar *x,*tmp,*tmps,sum; 817 const PetscScalar *b; 818 const MatScalar *aa = a->a,*v; 819 820 PetscFunctionBegin; 821 if (!n) PetscFunctionReturn(0); 822 823 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 824 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 825 tmp = a->solve_work; 826 827 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 828 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 829 830 /* forward solve the lower triangular */ 831 tmp[0] = b[*r++]; 832 tmps = tmp; 833 for (i=1; i<n; i++) { 834 v = aa + ai[i] ; 835 vi = aj + ai[i] ; 836 nz = a->diag[i] - ai[i]; 837 sum = b[*r++]; 838 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 839 tmp[i] = sum; 840 } 841 842 /* backward solve the upper triangular */ 843 for (i=n-1; i>=0; i--){ 844 v = aa + a->diag[i] + 1; 845 vi = aj + a->diag[i] + 1; 846 nz = ai[i+1] - a->diag[i] - 1; 847 sum = tmp[i]; 848 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 849 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 850 } 851 852 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 853 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 854 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 855 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 856 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatMatSolve_SeqAIJ" 862 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 863 { 864 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 865 IS iscol = a->col,isrow = a->row; 866 PetscErrorCode ierr; 867 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 868 PetscInt nz,neq; 869 const PetscInt *rout,*cout,*r,*c; 870 PetscScalar *x,*b,*tmp,*tmps,sum; 871 const MatScalar *aa = a->a,*v; 872 PetscTruth bisdense,xisdense; 873 874 PetscFunctionBegin; 875 if (!n) PetscFunctionReturn(0); 876 877 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 878 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 879 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 880 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 881 882 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 883 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 884 885 tmp = a->solve_work; 886 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 887 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 888 889 for (neq=0; neq<B->cmap->n; neq++){ 890 /* forward solve the lower triangular */ 891 tmp[0] = b[r[0]]; 892 tmps = tmp; 893 for (i=1; i<n; i++) { 894 v = aa + ai[i] ; 895 vi = aj + ai[i] ; 896 nz = a->diag[i] - ai[i]; 897 sum = b[r[i]]; 898 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 899 tmp[i] = sum; 900 } 901 /* backward solve the upper triangular */ 902 for (i=n-1; i>=0; i--){ 903 v = aa + a->diag[i] + 1; 904 vi = aj + a->diag[i] + 1; 905 nz = ai[i+1] - a->diag[i] - 1; 906 sum = tmp[i]; 907 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 908 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 909 } 910 911 b += n; 912 x += n; 913 } 914 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 915 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 916 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 917 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 918 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 924 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 925 { 926 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 927 IS iscol = a->col,isrow = a->row; 928 PetscErrorCode ierr; 929 const PetscInt *r,*c,*rout,*cout; 930 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 931 PetscInt nz,row; 932 PetscScalar *x,*b,*tmp,*tmps,sum; 933 const MatScalar *aa = a->a,*v; 934 935 PetscFunctionBegin; 936 if (!n) PetscFunctionReturn(0); 937 938 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 939 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 940 tmp = a->solve_work; 941 942 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 943 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 944 945 /* forward solve the lower triangular */ 946 tmp[0] = b[*r++]; 947 tmps = tmp; 948 for (row=1; row<n; row++) { 949 i = rout[row]; /* permuted row */ 950 v = aa + ai[i] ; 951 vi = aj + ai[i] ; 952 nz = a->diag[i] - ai[i]; 953 sum = b[*r++]; 954 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 955 tmp[row] = sum; 956 } 957 958 /* backward solve the upper triangular */ 959 for (row=n-1; row>=0; row--){ 960 i = rout[row]; /* permuted row */ 961 v = aa + a->diag[i] + 1; 962 vi = aj + a->diag[i] + 1; 963 nz = ai[i+1] - a->diag[i] - 1; 964 sum = tmp[row]; 965 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 966 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 967 } 968 969 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 970 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 971 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 972 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 973 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 /* ----------------------------------------------------------- */ 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 980 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 981 { 982 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 983 PetscErrorCode ierr; 984 PetscInt n = A->rmap->n; 985 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 986 PetscScalar *x; 987 const PetscScalar *b; 988 const MatScalar *aa = a->a; 989 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 990 PetscInt adiag_i,i,nz,ai_i; 991 const MatScalar *v; 992 PetscScalar sum; 993 #endif 994 995 PetscFunctionBegin; 996 if (!n) PetscFunctionReturn(0); 997 998 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 999 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1000 1001 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1002 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1003 #else 1004 /* forward solve the lower triangular */ 1005 x[0] = b[0]; 1006 for (i=1; i<n; i++) { 1007 ai_i = ai[i]; 1008 v = aa + ai_i; 1009 vi = aj + ai_i; 1010 nz = adiag[i] - ai_i; 1011 sum = b[i]; 1012 while (nz--) sum -= *v++ * x[*vi++]; 1013 x[i] = sum; 1014 } 1015 1016 /* backward solve the upper triangular */ 1017 for (i=n-1; i>=0; i--){ 1018 adiag_i = adiag[i]; 1019 v = aa + adiag_i + 1; 1020 vi = aj + adiag_i + 1; 1021 nz = ai[i+1] - adiag_i - 1; 1022 sum = x[i]; 1023 while (nz--) sum -= *v++ * x[*vi++]; 1024 x[i] = sum*aa[adiag_i]; 1025 } 1026 #endif 1027 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1028 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1029 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1035 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1036 { 1037 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1038 IS iscol = a->col,isrow = a->row; 1039 PetscErrorCode ierr; 1040 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1041 PetscInt nz; 1042 const PetscInt *rout,*cout,*r,*c; 1043 PetscScalar *x,*b,*tmp,sum; 1044 const MatScalar *aa = a->a,*v; 1045 1046 PetscFunctionBegin; 1047 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1048 1049 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1050 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1051 tmp = a->solve_work; 1052 1053 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1054 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1055 1056 /* forward solve the lower triangular */ 1057 tmp[0] = b[*r++]; 1058 for (i=1; i<n; i++) { 1059 v = aa + ai[i] ; 1060 vi = aj + ai[i] ; 1061 nz = a->diag[i] - ai[i]; 1062 sum = b[*r++]; 1063 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1064 tmp[i] = sum; 1065 } 1066 1067 /* backward solve the upper triangular */ 1068 for (i=n-1; i>=0; i--){ 1069 v = aa + a->diag[i] + 1; 1070 vi = aj + a->diag[i] + 1; 1071 nz = ai[i+1] - a->diag[i] - 1; 1072 sum = tmp[i]; 1073 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1074 tmp[i] = sum*aa[a->diag[i]]; 1075 x[*c--] += tmp[i]; 1076 } 1077 1078 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1079 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1080 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1081 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1082 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1083 1084 PetscFunctionReturn(0); 1085 } 1086 /* -------------------------------------------------------------------*/ 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1089 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1090 { 1091 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1092 IS iscol = a->col,isrow = a->row; 1093 PetscErrorCode ierr; 1094 const PetscInt *rout,*cout,*r,*c; 1095 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1096 PetscInt nz,*diag = a->diag; 1097 PetscScalar *x,*b,*tmp,s1; 1098 const MatScalar *aa = a->a,*v; 1099 1100 PetscFunctionBegin; 1101 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1102 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1103 tmp = a->solve_work; 1104 1105 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1106 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1107 1108 /* copy the b into temp work space according to permutation */ 1109 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1110 1111 /* forward solve the U^T */ 1112 for (i=0; i<n; i++) { 1113 v = aa + diag[i] ; 1114 vi = aj + diag[i] + 1; 1115 nz = ai[i+1] - diag[i] - 1; 1116 s1 = tmp[i]; 1117 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1118 while (nz--) { 1119 tmp[*vi++ ] -= (*v++)*s1; 1120 } 1121 tmp[i] = s1; 1122 } 1123 1124 /* backward solve the L^T */ 1125 for (i=n-1; i>=0; i--){ 1126 v = aa + diag[i] - 1 ; 1127 vi = aj + diag[i] - 1 ; 1128 nz = diag[i] - ai[i]; 1129 s1 = tmp[i]; 1130 while (nz--) { 1131 tmp[*vi-- ] -= (*v--)*s1; 1132 } 1133 } 1134 1135 /* copy tmp into x according to permutation */ 1136 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1137 1138 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1139 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1140 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1141 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1142 1143 ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1149 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1150 { 1151 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1152 IS iscol = a->col,isrow = a->row; 1153 PetscErrorCode ierr; 1154 const PetscInt *r,*c,*rout,*cout; 1155 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1156 PetscInt nz,*diag = a->diag; 1157 PetscScalar *x,*b,*tmp; 1158 const MatScalar *aa = a->a,*v; 1159 1160 PetscFunctionBegin; 1161 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1162 1163 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1164 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1165 tmp = a->solve_work; 1166 1167 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1168 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1169 1170 /* copy the b into temp work space according to permutation */ 1171 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1172 1173 /* forward solve the U^T */ 1174 for (i=0; i<n; i++) { 1175 v = aa + diag[i] ; 1176 vi = aj + diag[i] + 1; 1177 nz = ai[i+1] - diag[i] - 1; 1178 tmp[i] *= *v++; 1179 while (nz--) { 1180 tmp[*vi++ ] -= (*v++)*tmp[i]; 1181 } 1182 } 1183 1184 /* backward solve the L^T */ 1185 for (i=n-1; i>=0; i--){ 1186 v = aa + diag[i] - 1 ; 1187 vi = aj + diag[i] - 1 ; 1188 nz = diag[i] - ai[i]; 1189 while (nz--) { 1190 tmp[*vi-- ] -= (*v--)*tmp[i]; 1191 } 1192 } 1193 1194 /* copy tmp into x according to permutation */ 1195 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1196 1197 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1198 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1199 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1200 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1201 1202 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1203 PetscFunctionReturn(0); 1204 } 1205 /* ----------------------------------------------------------------*/ 1206 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1207 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1211 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1212 { 1213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1214 IS isicol; 1215 PetscErrorCode ierr; 1216 const PetscInt *r,*ic; 1217 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1218 PetscInt *bi,*cols,nnz,*cols_lvl; 1219 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1220 PetscInt i,levels,diagonal_fill; 1221 PetscTruth col_identity,row_identity; 1222 PetscReal f; 1223 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1224 PetscBT lnkbt; 1225 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1226 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1227 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1228 PetscTruth missing; 1229 1230 PetscFunctionBegin; 1231 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1232 f = info->fill; 1233 levels = (PetscInt)info->levels; 1234 diagonal_fill = (PetscInt)info->diagonal_fill; 1235 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1236 1237 /* special case that simply copies fill pattern */ 1238 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1239 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1240 if (!levels && row_identity && col_identity) { 1241 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1242 fact->factor = MAT_FACTOR_ILU; 1243 (fact)->info.factor_mallocs = 0; 1244 (fact)->info.fill_ratio_given = info->fill; 1245 (fact)->info.fill_ratio_needed = 1.0; 1246 b = (Mat_SeqAIJ*)(fact)->data; 1247 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1248 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1249 b->row = isrow; 1250 b->col = iscol; 1251 b->icol = isicol; 1252 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1253 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1254 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1255 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1256 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1261 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1262 1263 /* get new row pointers */ 1264 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1265 bi[0] = 0; 1266 /* bdiag is location of diagonal in factor */ 1267 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1268 bdiag[0] = 0; 1269 1270 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1271 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1272 1273 /* create a linked list for storing column indices of the active row */ 1274 nlnk = n + 1; 1275 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1276 1277 /* initial FreeSpace size is f*(ai[n]+1) */ 1278 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1279 current_space = free_space; 1280 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1281 current_space_lvl = free_space_lvl; 1282 1283 for (i=0; i<n; i++) { 1284 nzi = 0; 1285 /* copy current row into linked list */ 1286 nnz = ai[r[i]+1] - ai[r[i]]; 1287 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1288 cols = aj + ai[r[i]]; 1289 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1290 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1291 nzi += nlnk; 1292 1293 /* make sure diagonal entry is included */ 1294 if (diagonal_fill && lnk[i] == -1) { 1295 fm = n; 1296 while (lnk[fm] < i) fm = lnk[fm]; 1297 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1298 lnk[fm] = i; 1299 lnk_lvl[i] = 0; 1300 nzi++; dcount++; 1301 } 1302 1303 /* add pivot rows into the active row */ 1304 nzbd = 0; 1305 prow = lnk[n]; 1306 while (prow < i) { 1307 nnz = bdiag[prow]; 1308 cols = bj_ptr[prow] + nnz + 1; 1309 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1310 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1311 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1312 nzi += nlnk; 1313 prow = lnk[prow]; 1314 nzbd++; 1315 } 1316 bdiag[i] = nzbd; 1317 bi[i+1] = bi[i] + nzi; 1318 1319 /* if free space is not available, make more free space */ 1320 if (current_space->local_remaining<nzi) { 1321 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1322 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1323 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1324 reallocs++; 1325 } 1326 1327 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1328 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1329 bj_ptr[i] = current_space->array; 1330 bjlvl_ptr[i] = current_space_lvl->array; 1331 1332 /* make sure the active row i has diagonal entry */ 1333 if (*(bj_ptr[i]+bdiag[i]) != i) { 1334 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1335 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1336 } 1337 1338 current_space->array += nzi; 1339 current_space->local_used += nzi; 1340 current_space->local_remaining -= nzi; 1341 current_space_lvl->array += nzi; 1342 current_space_lvl->local_used += nzi; 1343 current_space_lvl->local_remaining -= nzi; 1344 } 1345 1346 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1348 1349 /* destroy list of free space and other temporary arrays */ 1350 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1351 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1352 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1353 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1354 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1355 1356 #if defined(PETSC_USE_INFO) 1357 { 1358 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1359 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1360 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1361 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1362 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1363 if (diagonal_fill) { 1364 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1365 } 1366 } 1367 #endif 1368 1369 /* put together the new matrix */ 1370 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1371 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1372 b = (Mat_SeqAIJ*)(fact)->data; 1373 b->free_a = PETSC_TRUE; 1374 b->free_ij = PETSC_TRUE; 1375 b->singlemalloc = PETSC_FALSE; 1376 len = (bi[n] )*sizeof(PetscScalar); 1377 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1378 b->j = bj; 1379 b->i = bi; 1380 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1381 b->diag = bdiag; 1382 b->ilen = 0; 1383 b->imax = 0; 1384 b->row = isrow; 1385 b->col = iscol; 1386 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1387 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1388 b->icol = isicol; 1389 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1390 /* In b structure: Free imax, ilen, old a, old j. 1391 Allocate bdiag, solve_work, new a, new j */ 1392 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1393 b->maxnz = b->nz = bi[n] ; 1394 (fact)->info.factor_mallocs = reallocs; 1395 (fact)->info.fill_ratio_given = f; 1396 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1397 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1398 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #include "../src/mat/impls/sbaij/seq/sbaij.h" 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1405 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1406 { 1407 Mat C = B; 1408 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1409 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1410 IS ip=b->row,iip = b->icol; 1411 PetscErrorCode ierr; 1412 const PetscInt *rip,*riip; 1413 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1414 PetscInt *ai=a->i,*aj=a->j; 1415 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1416 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1417 PetscReal zeropivot,rs,shiftnz; 1418 PetscReal shiftpd; 1419 ChShift_Ctx sctx; 1420 PetscInt newshift; 1421 PetscTruth perm_identity; 1422 1423 PetscFunctionBegin; 1424 1425 shiftnz = info->shiftnz; 1426 shiftpd = info->shiftpd; 1427 zeropivot = info->zeropivot; 1428 1429 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1430 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1431 1432 /* initialization */ 1433 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1434 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1435 jl = il + mbs; 1436 rtmp = (MatScalar*)(jl + mbs); 1437 1438 sctx.shift_amount = 0; 1439 sctx.nshift = 0; 1440 do { 1441 sctx.chshift = PETSC_FALSE; 1442 for (i=0; i<mbs; i++) { 1443 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1444 } 1445 1446 for (k = 0; k<mbs; k++){ 1447 bval = ba + bi[k]; 1448 /* initialize k-th row by the perm[k]-th row of A */ 1449 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1450 for (j = jmin; j < jmax; j++){ 1451 col = riip[aj[j]]; 1452 if (col >= k){ /* only take upper triangular entry */ 1453 rtmp[col] = aa[j]; 1454 *bval++ = 0.0; /* for in-place factorization */ 1455 } 1456 } 1457 /* shift the diagonal of the matrix */ 1458 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1459 1460 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1461 dk = rtmp[k]; 1462 i = jl[k]; /* first row to be added to k_th row */ 1463 1464 while (i < k){ 1465 nexti = jl[i]; /* next row to be added to k_th row */ 1466 1467 /* compute multiplier, update diag(k) and U(i,k) */ 1468 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1469 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1470 dk += uikdi*ba[ili]; 1471 ba[ili] = uikdi; /* -U(i,k) */ 1472 1473 /* add multiple of row i to k-th row */ 1474 jmin = ili + 1; jmax = bi[i+1]; 1475 if (jmin < jmax){ 1476 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1477 /* update il and jl for row i */ 1478 il[i] = jmin; 1479 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1480 } 1481 i = nexti; 1482 } 1483 1484 /* shift the diagonals when zero pivot is detected */ 1485 /* compute rs=sum of abs(off-diagonal) */ 1486 rs = 0.0; 1487 jmin = bi[k]+1; 1488 nz = bi[k+1] - jmin; 1489 bcol = bj + jmin; 1490 while (nz--){ 1491 rs += PetscAbsScalar(rtmp[*bcol]); 1492 bcol++; 1493 } 1494 1495 sctx.rs = rs; 1496 sctx.pv = dk; 1497 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1498 1499 if (newshift == 1) { 1500 if (!sctx.shift_amount) { 1501 sctx.shift_amount = 1e-5; 1502 } 1503 break; 1504 } 1505 1506 /* copy data into U(k,:) */ 1507 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1508 jmin = bi[k]+1; jmax = bi[k+1]; 1509 if (jmin < jmax) { 1510 for (j=jmin; j<jmax; j++){ 1511 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1512 } 1513 /* add the k-th row into il and jl */ 1514 il[k] = jmin; 1515 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1516 } 1517 } 1518 } while (sctx.chshift); 1519 ierr = PetscFree(il);CHKERRQ(ierr); 1520 1521 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1522 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1523 1524 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1525 if (perm_identity){ 1526 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1527 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1528 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1529 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1530 } else { 1531 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1532 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1533 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1534 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1535 } 1536 1537 C->assembled = PETSC_TRUE; 1538 C->preallocated = PETSC_TRUE; 1539 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1540 if (sctx.nshift){ 1541 if (shiftnz) { 1542 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1543 } else if (shiftpd) { 1544 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1545 } 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1552 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1553 { 1554 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1555 Mat_SeqSBAIJ *b; 1556 PetscErrorCode ierr; 1557 PetscTruth perm_identity,missing; 1558 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1559 const PetscInt *rip,*riip; 1560 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1561 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1562 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1563 PetscReal fill=info->fill,levels=info->levels; 1564 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1565 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1566 PetscBT lnkbt; 1567 IS iperm; 1568 1569 PetscFunctionBegin; 1570 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1571 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1572 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1573 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1574 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1575 1576 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1577 ui[0] = 0; 1578 1579 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1580 if (!levels && perm_identity) { 1581 1582 for (i=0; i<am; i++) { 1583 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1584 } 1585 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1586 cols = uj; 1587 for (i=0; i<am; i++) { 1588 aj = a->j + a->diag[i]; 1589 ncols = ui[i+1] - ui[i]; 1590 for (j=0; j<ncols; j++) *cols++ = *aj++; 1591 } 1592 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1593 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1594 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1595 1596 /* initialization */ 1597 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1598 1599 /* jl: linked list for storing indices of the pivot rows 1600 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1601 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1602 il = jl + am; 1603 uj_ptr = (PetscInt**)(il + am); 1604 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1605 for (i=0; i<am; i++){ 1606 jl[i] = am; il[i] = 0; 1607 } 1608 1609 /* create and initialize a linked list for storing column indices of the active row k */ 1610 nlnk = am + 1; 1611 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1612 1613 /* initial FreeSpace size is fill*(ai[am]+1) */ 1614 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1615 current_space = free_space; 1616 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1617 current_space_lvl = free_space_lvl; 1618 1619 for (k=0; k<am; k++){ /* for each active row k */ 1620 /* initialize lnk by the column indices of row rip[k] of A */ 1621 nzk = 0; 1622 ncols = ai[rip[k]+1] - ai[rip[k]]; 1623 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1624 ncols_upper = 0; 1625 for (j=0; j<ncols; j++){ 1626 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1627 if (riip[i] >= k){ /* only take upper triangular entry */ 1628 ajtmp[ncols_upper] = i; 1629 ncols_upper++; 1630 } 1631 } 1632 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1633 nzk += nlnk; 1634 1635 /* update lnk by computing fill-in for each pivot row to be merged in */ 1636 prow = jl[k]; /* 1st pivot row */ 1637 1638 while (prow < k){ 1639 nextprow = jl[prow]; 1640 1641 /* merge prow into k-th row */ 1642 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1643 jmax = ui[prow+1]; 1644 ncols = jmax-jmin; 1645 i = jmin - ui[prow]; 1646 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1647 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1648 j = *(uj - 1); 1649 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1650 nzk += nlnk; 1651 1652 /* update il and jl for prow */ 1653 if (jmin < jmax){ 1654 il[prow] = jmin; 1655 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1656 } 1657 prow = nextprow; 1658 } 1659 1660 /* if free space is not available, make more free space */ 1661 if (current_space->local_remaining<nzk) { 1662 i = am - k + 1; /* num of unfactored rows */ 1663 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1664 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1665 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1666 reallocs++; 1667 } 1668 1669 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1670 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1671 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1672 1673 /* add the k-th row into il and jl */ 1674 if (nzk > 1){ 1675 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1676 jl[k] = jl[i]; jl[i] = k; 1677 il[k] = ui[k] + 1; 1678 } 1679 uj_ptr[k] = current_space->array; 1680 uj_lvl_ptr[k] = current_space_lvl->array; 1681 1682 current_space->array += nzk; 1683 current_space->local_used += nzk; 1684 current_space->local_remaining -= nzk; 1685 1686 current_space_lvl->array += nzk; 1687 current_space_lvl->local_used += nzk; 1688 current_space_lvl->local_remaining -= nzk; 1689 1690 ui[k+1] = ui[k] + nzk; 1691 } 1692 1693 #if defined(PETSC_USE_INFO) 1694 if (ai[am] != 0) { 1695 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1696 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1697 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1698 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1699 } else { 1700 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1701 } 1702 #endif 1703 1704 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1705 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1706 ierr = PetscFree(jl);CHKERRQ(ierr); 1707 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1708 1709 /* destroy list of free space and other temporary array(s) */ 1710 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1711 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1712 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1713 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1714 1715 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1716 1717 /* put together the new matrix in MATSEQSBAIJ format */ 1718 1719 b = (Mat_SeqSBAIJ*)(fact)->data; 1720 b->singlemalloc = PETSC_FALSE; 1721 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1722 b->j = uj; 1723 b->i = ui; 1724 b->diag = 0; 1725 b->ilen = 0; 1726 b->imax = 0; 1727 b->row = perm; 1728 b->col = perm; 1729 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1730 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1731 b->icol = iperm; 1732 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1733 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1734 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1735 b->maxnz = b->nz = ui[am]; 1736 b->free_a = PETSC_TRUE; 1737 b->free_ij = PETSC_TRUE; 1738 1739 (fact)->info.factor_mallocs = reallocs; 1740 (fact)->info.fill_ratio_given = fill; 1741 if (ai[am] != 0) { 1742 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1743 } else { 1744 (fact)->info.fill_ratio_needed = 0.0; 1745 } 1746 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1752 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1753 { 1754 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1755 Mat_SeqSBAIJ *b; 1756 PetscErrorCode ierr; 1757 PetscTruth perm_identity; 1758 PetscReal fill = info->fill; 1759 const PetscInt *rip,*riip; 1760 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1761 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1762 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1763 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1764 PetscBT lnkbt; 1765 IS iperm; 1766 1767 PetscFunctionBegin; 1768 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1769 /* check whether perm is the identity mapping */ 1770 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1771 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1772 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1773 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1774 1775 /* initialization */ 1776 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1777 ui[0] = 0; 1778 1779 /* jl: linked list for storing indices of the pivot rows 1780 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1781 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1782 il = jl + am; 1783 cols = il + am; 1784 ui_ptr = (PetscInt**)(cols + am); 1785 for (i=0; i<am; i++){ 1786 jl[i] = am; il[i] = 0; 1787 } 1788 1789 /* create and initialize a linked list for storing column indices of the active row k */ 1790 nlnk = am + 1; 1791 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1792 1793 /* initial FreeSpace size is fill*(ai[am]+1) */ 1794 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1795 current_space = free_space; 1796 1797 for (k=0; k<am; k++){ /* for each active row k */ 1798 /* initialize lnk by the column indices of row rip[k] of A */ 1799 nzk = 0; 1800 ncols = ai[rip[k]+1] - ai[rip[k]]; 1801 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1802 ncols_upper = 0; 1803 for (j=0; j<ncols; j++){ 1804 i = riip[*(aj + ai[rip[k]] + j)]; 1805 if (i >= k){ /* only take upper triangular entry */ 1806 cols[ncols_upper] = i; 1807 ncols_upper++; 1808 } 1809 } 1810 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1811 nzk += nlnk; 1812 1813 /* update lnk by computing fill-in for each pivot row to be merged in */ 1814 prow = jl[k]; /* 1st pivot row */ 1815 1816 while (prow < k){ 1817 nextprow = jl[prow]; 1818 /* merge prow into k-th row */ 1819 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1820 jmax = ui[prow+1]; 1821 ncols = jmax-jmin; 1822 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1823 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1824 nzk += nlnk; 1825 1826 /* update il and jl for prow */ 1827 if (jmin < jmax){ 1828 il[prow] = jmin; 1829 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1830 } 1831 prow = nextprow; 1832 } 1833 1834 /* if free space is not available, make more free space */ 1835 if (current_space->local_remaining<nzk) { 1836 i = am - k + 1; /* num of unfactored rows */ 1837 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1838 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1839 reallocs++; 1840 } 1841 1842 /* copy data into free space, then initialize lnk */ 1843 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1844 1845 /* add the k-th row into il and jl */ 1846 if (nzk-1 > 0){ 1847 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1848 jl[k] = jl[i]; jl[i] = k; 1849 il[k] = ui[k] + 1; 1850 } 1851 ui_ptr[k] = current_space->array; 1852 current_space->array += nzk; 1853 current_space->local_used += nzk; 1854 current_space->local_remaining -= nzk; 1855 1856 ui[k+1] = ui[k] + nzk; 1857 } 1858 1859 #if defined(PETSC_USE_INFO) 1860 if (ai[am] != 0) { 1861 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1862 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1863 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1864 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1865 } else { 1866 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1867 } 1868 #endif 1869 1870 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1871 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1872 ierr = PetscFree(jl);CHKERRQ(ierr); 1873 1874 /* destroy list of free space and other temporary array(s) */ 1875 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1876 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1877 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1878 1879 /* put together the new matrix in MATSEQSBAIJ format */ 1880 1881 b = (Mat_SeqSBAIJ*)(fact)->data; 1882 b->singlemalloc = PETSC_FALSE; 1883 b->free_a = PETSC_TRUE; 1884 b->free_ij = PETSC_TRUE; 1885 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1886 b->j = uj; 1887 b->i = ui; 1888 b->diag = 0; 1889 b->ilen = 0; 1890 b->imax = 0; 1891 b->row = perm; 1892 b->col = perm; 1893 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1894 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1895 b->icol = iperm; 1896 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1897 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1898 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1899 b->maxnz = b->nz = ui[am]; 1900 1901 (fact)->info.factor_mallocs = reallocs; 1902 (fact)->info.fill_ratio_given = fill; 1903 if (ai[am] != 0) { 1904 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1905 } else { 1906 (fact)->info.fill_ratio_needed = 0.0; 1907 } 1908 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1909 PetscFunctionReturn(0); 1910 } 1911