1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->sorted = PETSC_FALSE; 233 b->singlemalloc = PETSC_FALSE; 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 af = ((double)b->nz)/((double)a->nz) + .001; 248 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 250 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 251 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 252 253 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 254 255 PetscFunctionReturn(0); 256 #endif 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 261 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 262 { 263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 264 IS isicol; 265 PetscErrorCode ierr; 266 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 267 PetscInt *bi,*bj,*ajtmp; 268 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 269 PetscReal f; 270 PetscInt nlnk,*lnk,k,**bi_ptr; 271 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 272 PetscBT lnkbt; 273 274 PetscFunctionBegin; 275 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 276 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 278 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 279 280 /* get new row pointers */ 281 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 282 bi[0] = 0; 283 284 /* bdiag is location of diagonal in factor */ 285 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 286 bdiag[0] = 0; 287 288 /* linked list for storing column indices of the active row */ 289 nlnk = n + 1; 290 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 291 292 ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr); 293 bi_ptr = (PetscInt**)(im + n); 294 295 /* initial FreeSpace size is f*(ai[n]+1) */ 296 f = info->fill; 297 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 298 current_space = free_space; 299 300 for (i=0; i<n; i++) { 301 /* copy previous fill into linked list */ 302 nzi = 0; 303 nnz = ai[r[i]+1] - ai[r[i]]; 304 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 305 ajtmp = aj + ai[r[i]]; 306 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 307 nzi += nlnk; 308 309 /* add pivot rows into linked list */ 310 row = lnk[n]; 311 while (row < i) { 312 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 313 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 314 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 315 nzi += nlnk; 316 row = lnk[row]; 317 } 318 bi[i+1] = bi[i] + nzi; 319 im[i] = nzi; 320 321 /* mark bdiag */ 322 nzbd = 0; 323 nnz = nzi; 324 k = lnk[n]; 325 while (nnz-- && k < i){ 326 nzbd++; 327 k = lnk[k]; 328 } 329 bdiag[i] = bi[i] + nzbd; 330 331 /* if free space is not available, make more free space */ 332 if (current_space->local_remaining<nzi) { 333 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 334 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 335 reallocs++; 336 } 337 338 /* copy data into free space, then initialize lnk */ 339 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 340 bi_ptr[i] = current_space->array; 341 current_space->array += nzi; 342 current_space->local_used += nzi; 343 current_space->local_remaining -= nzi; 344 } 345 #if defined(PETSC_USE_INFO) 346 if (ai[n] != 0) { 347 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 348 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 349 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 350 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 351 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 352 } else { 353 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 354 } 355 #endif 356 357 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 358 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 359 360 /* destroy list of free space and other temporary array(s) */ 361 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 362 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 363 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 364 ierr = PetscFree(im);CHKERRQ(ierr); 365 366 /* put together the new matrix */ 367 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 368 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 369 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 370 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 371 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 372 b = (Mat_SeqAIJ*)(*B)->data; 373 b->free_a = PETSC_TRUE; 374 b->free_ij = PETSC_TRUE; 375 b->singlemalloc = PETSC_FALSE; 376 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 377 b->j = bj; 378 b->i = bi; 379 b->diag = bdiag; 380 b->ilen = 0; 381 b->imax = 0; 382 b->row = isrow; 383 b->col = iscol; 384 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 385 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 386 b->icol = isicol; 387 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 388 389 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 390 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 391 b->maxnz = b->nz = bi[n] ; 392 393 (*B)->factor = FACTOR_LU; 394 (*B)->info.factor_mallocs = reallocs; 395 (*B)->info.fill_ratio_given = f; 396 397 if (ai[n] != 0) { 398 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 399 } else { 400 (*B)->info.fill_ratio_needed = 0.0; 401 } 402 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 403 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 404 PetscFunctionReturn(0); 405 } 406 407 /* 408 Trouble in factorization, should we dump the original matrix? 409 */ 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatFactorDumpMatrix" 412 PetscErrorCode MatFactorDumpMatrix(Mat A) 413 { 414 PetscErrorCode ierr; 415 PetscTruth flg; 416 417 PetscFunctionBegin; 418 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 419 if (flg) { 420 PetscViewer viewer; 421 char filename[PETSC_MAX_PATH_LEN]; 422 423 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 424 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 425 ierr = MatView(A,viewer);CHKERRQ(ierr); 426 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 427 } 428 PetscFunctionReturn(0); 429 } 430 431 /* ----------------------------------------------------------- */ 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 434 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 435 { 436 Mat C=*B; 437 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 438 IS isrow = b->row,isicol = b->icol; 439 PetscErrorCode ierr; 440 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 441 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 442 PetscInt *diag_offset = b->diag,diag,*pj; 443 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 444 PetscScalar d; 445 PetscReal rs; 446 LUShift_Ctx sctx; 447 PetscInt newshift,*ddiag; 448 449 PetscFunctionBegin; 450 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 451 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 452 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 453 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 454 rtmps = rtmp; ics = ic; 455 456 sctx.shift_top = 0; 457 sctx.nshift_max = 0; 458 sctx.shift_lo = 0; 459 sctx.shift_hi = 0; 460 461 /* if both shift schemes are chosen by user, only use info->shiftpd */ 462 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 463 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 464 PetscInt *aai = a->i; 465 ddiag = a->diag; 466 sctx.shift_top = 0; 467 for (i=0; i<n; i++) { 468 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 469 d = (a->a)[ddiag[i]]; 470 rs = -PetscAbsScalar(d) - PetscRealPart(d); 471 v = a->a+aai[i]; 472 nz = aai[i+1] - aai[i]; 473 for (j=0; j<nz; j++) 474 rs += PetscAbsScalar(v[j]); 475 if (rs>sctx.shift_top) sctx.shift_top = rs; 476 } 477 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 478 sctx.shift_top *= 1.1; 479 sctx.nshift_max = 5; 480 sctx.shift_lo = 0.; 481 sctx.shift_hi = 1.; 482 } 483 484 sctx.shift_amount = 0; 485 sctx.nshift = 0; 486 do { 487 sctx.lushift = PETSC_FALSE; 488 for (i=0; i<n; i++){ 489 nz = bi[i+1] - bi[i]; 490 bjtmp = bj + bi[i]; 491 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 492 493 /* load in initial (unfactored row) */ 494 nz = a->i[r[i]+1] - a->i[r[i]]; 495 ajtmp = a->j + a->i[r[i]]; 496 v = a->a + a->i[r[i]]; 497 for (j=0; j<nz; j++) { 498 rtmp[ics[ajtmp[j]]] = v[j]; 499 } 500 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 501 502 row = *bjtmp++; 503 while (row < i) { 504 pc = rtmp + row; 505 if (*pc != 0.0) { 506 pv = b->a + diag_offset[row]; 507 pj = b->j + diag_offset[row] + 1; 508 multiplier = *pc / *pv++; 509 *pc = multiplier; 510 nz = bi[row+1] - diag_offset[row] - 1; 511 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 512 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 513 } 514 row = *bjtmp++; 515 } 516 /* finished row so stick it into b->a */ 517 pv = b->a + bi[i] ; 518 pj = b->j + bi[i] ; 519 nz = bi[i+1] - bi[i]; 520 diag = diag_offset[i] - bi[i]; 521 rs = 0.0; 522 for (j=0; j<nz; j++) { 523 pv[j] = rtmps[pj[j]]; 524 if (j != diag) rs += PetscAbsScalar(pv[j]); 525 } 526 527 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 528 sctx.rs = rs; 529 sctx.pv = pv[diag]; 530 ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr); 531 if (newshift == 1) break; 532 } 533 534 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 535 /* 536 * if no shift in this attempt & shifting & started shifting & can refine, 537 * then try lower shift 538 */ 539 sctx.shift_hi = info->shift_fraction; 540 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 541 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 542 sctx.lushift = PETSC_TRUE; 543 sctx.nshift++; 544 } 545 } while (sctx.lushift); 546 547 /* invert diagonal entries for simplier triangular solves */ 548 for (i=0; i<n; i++) { 549 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 550 } 551 552 ierr = PetscFree(rtmp);CHKERRQ(ierr); 553 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 554 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 555 C->factor = FACTOR_LU; 556 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 557 C->assembled = PETSC_TRUE; 558 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 559 if (sctx.nshift){ 560 if (info->shiftnz) { 561 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 562 } else if (info->shiftpd) { 563 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 564 } 565 } 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 571 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 572 { 573 PetscFunctionBegin; 574 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 575 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 576 PetscFunctionReturn(0); 577 } 578 579 580 /* ----------------------------------------------------------- */ 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatLUFactor_SeqAIJ" 583 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 584 { 585 PetscErrorCode ierr; 586 Mat C; 587 588 PetscFunctionBegin; 589 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 590 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 591 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 592 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 /* ----------------------------------------------------------- */ 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatSolve_SeqAIJ" 598 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 599 { 600 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 601 IS iscol = a->col,isrow = a->row; 602 PetscErrorCode ierr; 603 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 604 PetscInt nz,*rout,*cout; 605 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 606 607 PetscFunctionBegin; 608 if (!n) PetscFunctionReturn(0); 609 610 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 611 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 612 tmp = a->solve_work; 613 614 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 615 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 616 617 /* forward solve the lower triangular */ 618 tmp[0] = b[*r++]; 619 tmps = tmp; 620 for (i=1; i<n; i++) { 621 v = aa + ai[i] ; 622 vi = aj + ai[i] ; 623 nz = a->diag[i] - ai[i]; 624 sum = b[*r++]; 625 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 626 tmp[i] = sum; 627 } 628 629 /* backward solve the upper triangular */ 630 for (i=n-1; i>=0; i--){ 631 v = aa + a->diag[i] + 1; 632 vi = aj + a->diag[i] + 1; 633 nz = ai[i+1] - a->diag[i] - 1; 634 sum = tmp[i]; 635 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 636 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 637 } 638 639 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 640 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 641 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 642 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 643 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNCT__ 648 #define __FUNCT__ "MatMatSolve_SeqAIJ" 649 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 650 { 651 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 652 IS iscol = a->col,isrow = a->row; 653 PetscErrorCode ierr; 654 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 655 PetscInt nz,*rout,*cout,neq; 656 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 657 658 PetscFunctionBegin; 659 if (!n) PetscFunctionReturn(0); 660 661 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 662 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 663 664 tmp = a->solve_work; 665 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 666 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 667 668 for (neq=0; neq<n; neq++){ 669 /* forward solve the lower triangular */ 670 tmp[0] = b[r[0]]; 671 tmps = tmp; 672 for (i=1; i<n; i++) { 673 v = aa + ai[i] ; 674 vi = aj + ai[i] ; 675 nz = a->diag[i] - ai[i]; 676 sum = b[r[i]]; 677 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 678 tmp[i] = sum; 679 } 680 /* backward solve the upper triangular */ 681 for (i=n-1; i>=0; i--){ 682 v = aa + a->diag[i] + 1; 683 vi = aj + a->diag[i] + 1; 684 nz = ai[i+1] - a->diag[i] - 1; 685 sum = tmp[i]; 686 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 687 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 688 } 689 690 b += n; 691 x += n; 692 } 693 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 694 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 695 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 696 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 697 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 701 /* ----------------------------------------------------------- */ 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 704 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 705 { 706 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 707 PetscErrorCode ierr; 708 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 709 PetscScalar *x,*b,*aa = a->a; 710 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 711 PetscInt adiag_i,i,*vi,nz,ai_i; 712 PetscScalar *v,sum; 713 #endif 714 715 PetscFunctionBegin; 716 if (!n) PetscFunctionReturn(0); 717 718 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 719 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 720 721 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 722 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 723 #else 724 /* forward solve the lower triangular */ 725 x[0] = b[0]; 726 for (i=1; i<n; i++) { 727 ai_i = ai[i]; 728 v = aa + ai_i; 729 vi = aj + ai_i; 730 nz = adiag[i] - ai_i; 731 sum = b[i]; 732 while (nz--) sum -= *v++ * x[*vi++]; 733 x[i] = sum; 734 } 735 736 /* backward solve the upper triangular */ 737 for (i=n-1; i>=0; i--){ 738 adiag_i = adiag[i]; 739 v = aa + adiag_i + 1; 740 vi = aj + adiag_i + 1; 741 nz = ai[i+1] - adiag_i - 1; 742 sum = x[i]; 743 while (nz--) sum -= *v++ * x[*vi++]; 744 x[i] = sum*aa[adiag_i]; 745 } 746 #endif 747 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 748 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 749 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 755 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 756 { 757 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 758 IS iscol = a->col,isrow = a->row; 759 PetscErrorCode ierr; 760 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 761 PetscInt nz,*rout,*cout; 762 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 763 764 PetscFunctionBegin; 765 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 766 767 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 768 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 769 tmp = a->solve_work; 770 771 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 772 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 773 774 /* forward solve the lower triangular */ 775 tmp[0] = b[*r++]; 776 for (i=1; i<n; i++) { 777 v = aa + ai[i] ; 778 vi = aj + ai[i] ; 779 nz = a->diag[i] - ai[i]; 780 sum = b[*r++]; 781 while (nz--) sum -= *v++ * tmp[*vi++ ]; 782 tmp[i] = sum; 783 } 784 785 /* backward solve the upper triangular */ 786 for (i=n-1; i>=0; i--){ 787 v = aa + a->diag[i] + 1; 788 vi = aj + a->diag[i] + 1; 789 nz = ai[i+1] - a->diag[i] - 1; 790 sum = tmp[i]; 791 while (nz--) sum -= *v++ * tmp[*vi++ ]; 792 tmp[i] = sum*aa[a->diag[i]]; 793 x[*c--] += tmp[i]; 794 } 795 796 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 797 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 798 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 801 802 PetscFunctionReturn(0); 803 } 804 /* -------------------------------------------------------------------*/ 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 807 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 808 { 809 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 810 IS iscol = a->col,isrow = a->row; 811 PetscErrorCode ierr; 812 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 813 PetscInt nz,*rout,*cout,*diag = a->diag; 814 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 815 816 PetscFunctionBegin; 817 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 tmp = a->solve_work; 820 821 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 822 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 823 824 /* copy the b into temp work space according to permutation */ 825 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 826 827 /* forward solve the U^T */ 828 for (i=0; i<n; i++) { 829 v = aa + diag[i] ; 830 vi = aj + diag[i] + 1; 831 nz = ai[i+1] - diag[i] - 1; 832 s1 = tmp[i]; 833 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 834 while (nz--) { 835 tmp[*vi++ ] -= (*v++)*s1; 836 } 837 tmp[i] = s1; 838 } 839 840 /* backward solve the L^T */ 841 for (i=n-1; i>=0; i--){ 842 v = aa + diag[i] - 1 ; 843 vi = aj + diag[i] - 1 ; 844 nz = diag[i] - ai[i]; 845 s1 = tmp[i]; 846 while (nz--) { 847 tmp[*vi-- ] -= (*v--)*s1; 848 } 849 } 850 851 /* copy tmp into x according to permutation */ 852 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 853 854 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 855 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 856 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 857 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 858 859 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 #undef __FUNCT__ 864 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 865 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 866 { 867 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 868 IS iscol = a->col,isrow = a->row; 869 PetscErrorCode ierr; 870 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 871 PetscInt nz,*rout,*cout,*diag = a->diag; 872 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 873 874 PetscFunctionBegin; 875 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 876 877 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 878 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 879 tmp = a->solve_work; 880 881 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 882 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 883 884 /* copy the b into temp work space according to permutation */ 885 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 886 887 /* forward solve the U^T */ 888 for (i=0; i<n; i++) { 889 v = aa + diag[i] ; 890 vi = aj + diag[i] + 1; 891 nz = ai[i+1] - diag[i] - 1; 892 tmp[i] *= *v++; 893 while (nz--) { 894 tmp[*vi++ ] -= (*v++)*tmp[i]; 895 } 896 } 897 898 /* backward solve the L^T */ 899 for (i=n-1; i>=0; i--){ 900 v = aa + diag[i] - 1 ; 901 vi = aj + diag[i] - 1 ; 902 nz = diag[i] - ai[i]; 903 while (nz--) { 904 tmp[*vi-- ] -= (*v--)*tmp[i]; 905 } 906 } 907 908 /* copy tmp into x according to permutation */ 909 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 910 911 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 912 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 913 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 914 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 915 916 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 /* ----------------------------------------------------------------*/ 920 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*); 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 924 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 925 { 926 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 927 IS isicol; 928 PetscErrorCode ierr; 929 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 930 PetscInt *bi,*cols,nnz,*cols_lvl; 931 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 932 PetscInt i,levels,diagonal_fill; 933 PetscTruth col_identity,row_identity; 934 PetscReal f; 935 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 936 PetscBT lnkbt; 937 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 938 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 939 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 940 PetscTruth missing; 941 942 PetscFunctionBegin; 943 f = info->fill; 944 levels = (PetscInt)info->levels; 945 diagonal_fill = (PetscInt)info->diagonal_fill; 946 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 947 948 /* special case that simply copies fill pattern */ 949 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 950 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 951 if (!levels && row_identity && col_identity) { 952 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 953 (*fact)->factor = FACTOR_LU; 954 (*fact)->info.factor_mallocs = 0; 955 (*fact)->info.fill_ratio_given = info->fill; 956 (*fact)->info.fill_ratio_needed = 1.0; 957 b = (Mat_SeqAIJ*)(*fact)->data; 958 ierr = MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);CHKERRQ(ierr); 959 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 960 b->row = isrow; 961 b->col = iscol; 962 b->icol = isicol; 963 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 964 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 965 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 966 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 970 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 971 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 972 973 /* get new row pointers */ 974 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 975 bi[0] = 0; 976 /* bdiag is location of diagonal in factor */ 977 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 978 bdiag[0] = 0; 979 980 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 981 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 982 983 /* create a linked list for storing column indices of the active row */ 984 nlnk = n + 1; 985 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 986 987 /* initial FreeSpace size is f*(ai[n]+1) */ 988 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 989 current_space = free_space; 990 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 991 current_space_lvl = free_space_lvl; 992 993 for (i=0; i<n; i++) { 994 nzi = 0; 995 /* copy current row into linked list */ 996 nnz = ai[r[i]+1] - ai[r[i]]; 997 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 998 cols = aj + ai[r[i]]; 999 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1000 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1001 nzi += nlnk; 1002 1003 /* make sure diagonal entry is included */ 1004 if (diagonal_fill && lnk[i] == -1) { 1005 fm = n; 1006 while (lnk[fm] < i) fm = lnk[fm]; 1007 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1008 lnk[fm] = i; 1009 lnk_lvl[i] = 0; 1010 nzi++; dcount++; 1011 } 1012 1013 /* add pivot rows into the active row */ 1014 nzbd = 0; 1015 prow = lnk[n]; 1016 while (prow < i) { 1017 nnz = bdiag[prow]; 1018 cols = bj_ptr[prow] + nnz + 1; 1019 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1020 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1021 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1022 nzi += nlnk; 1023 prow = lnk[prow]; 1024 nzbd++; 1025 } 1026 bdiag[i] = nzbd; 1027 bi[i+1] = bi[i] + nzi; 1028 1029 /* if free space is not available, make more free space */ 1030 if (current_space->local_remaining<nzi) { 1031 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1032 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1033 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1034 reallocs++; 1035 } 1036 1037 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1038 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1039 bj_ptr[i] = current_space->array; 1040 bjlvl_ptr[i] = current_space_lvl->array; 1041 1042 /* make sure the active row i has diagonal entry */ 1043 if (*(bj_ptr[i]+bdiag[i]) != i) { 1044 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1045 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1046 } 1047 1048 current_space->array += nzi; 1049 current_space->local_used += nzi; 1050 current_space->local_remaining -= nzi; 1051 current_space_lvl->array += nzi; 1052 current_space_lvl->local_used += nzi; 1053 current_space_lvl->local_remaining -= nzi; 1054 } 1055 1056 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1057 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1058 1059 /* destroy list of free space and other temporary arrays */ 1060 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1061 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1062 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1063 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1064 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1065 1066 #if defined(PETSC_USE_INFO) 1067 { 1068 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1069 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1070 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1071 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1072 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1073 if (diagonal_fill) { 1074 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1075 } 1076 } 1077 #endif 1078 1079 /* put together the new matrix */ 1080 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1081 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1082 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1083 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1084 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1085 b = (Mat_SeqAIJ*)(*fact)->data; 1086 b->free_a = PETSC_TRUE; 1087 b->free_ij = PETSC_TRUE; 1088 b->singlemalloc = PETSC_FALSE; 1089 len = (bi[n] )*sizeof(PetscScalar); 1090 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1091 b->j = bj; 1092 b->i = bi; 1093 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1094 b->diag = bdiag; 1095 b->ilen = 0; 1096 b->imax = 0; 1097 b->row = isrow; 1098 b->col = iscol; 1099 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1100 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1101 b->icol = isicol; 1102 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1103 /* In b structure: Free imax, ilen, old a, old j. 1104 Allocate bdiag, solve_work, new a, new j */ 1105 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1106 b->maxnz = b->nz = bi[n] ; 1107 (*fact)->factor = FACTOR_LU; 1108 (*fact)->info.factor_mallocs = reallocs; 1109 (*fact)->info.fill_ratio_given = f; 1110 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1111 1112 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1113 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1114 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #include "src/mat/impls/sbaij/seq/sbaij.h" 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1121 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1122 { 1123 Mat C = *B; 1124 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1125 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1126 IS ip=b->row; 1127 PetscErrorCode ierr; 1128 PetscInt *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1129 PetscInt *ai=a->i,*aj=a->j; 1130 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1131 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1132 PetscReal zeropivot,rs,shiftnz; 1133 PetscReal shiftpd; 1134 ChShift_Ctx sctx; 1135 PetscInt newshift; 1136 1137 PetscFunctionBegin; 1138 shiftnz = info->shiftnz; 1139 shiftpd = info->shiftpd; 1140 zeropivot = info->zeropivot; 1141 1142 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1143 1144 /* initialization */ 1145 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1146 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1147 jl = il + mbs; 1148 rtmp = (MatScalar*)(jl + mbs); 1149 1150 sctx.shift_amount = 0; 1151 sctx.nshift = 0; 1152 do { 1153 sctx.chshift = PETSC_FALSE; 1154 for (i=0; i<mbs; i++) { 1155 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1156 } 1157 1158 for (k = 0; k<mbs; k++){ 1159 bval = ba + bi[k]; 1160 /* initialize k-th row by the perm[k]-th row of A */ 1161 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1162 for (j = jmin; j < jmax; j++){ 1163 col = rip[aj[j]]; 1164 if (col >= k){ /* only take upper triangular entry */ 1165 rtmp[col] = aa[j]; 1166 *bval++ = 0.0; /* for in-place factorization */ 1167 } 1168 } 1169 /* shift the diagonal of the matrix */ 1170 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1171 1172 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1173 dk = rtmp[k]; 1174 i = jl[k]; /* first row to be added to k_th row */ 1175 1176 while (i < k){ 1177 nexti = jl[i]; /* next row to be added to k_th row */ 1178 1179 /* compute multiplier, update diag(k) and U(i,k) */ 1180 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1181 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1182 dk += uikdi*ba[ili]; 1183 ba[ili] = uikdi; /* -U(i,k) */ 1184 1185 /* add multiple of row i to k-th row */ 1186 jmin = ili + 1; jmax = bi[i+1]; 1187 if (jmin < jmax){ 1188 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1189 /* update il and jl for row i */ 1190 il[i] = jmin; 1191 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1192 } 1193 i = nexti; 1194 } 1195 1196 /* shift the diagonals when zero pivot is detected */ 1197 /* compute rs=sum of abs(off-diagonal) */ 1198 rs = 0.0; 1199 jmin = bi[k]+1; 1200 nz = bi[k+1] - jmin; 1201 if (nz){ 1202 bcol = bj + jmin; 1203 while (nz--){ 1204 rs += PetscAbsScalar(rtmp[*bcol]); 1205 bcol++; 1206 } 1207 } 1208 1209 sctx.rs = rs; 1210 sctx.pv = dk; 1211 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1212 if (newshift == 1) break; 1213 1214 /* copy data into U(k,:) */ 1215 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1216 jmin = bi[k]+1; jmax = bi[k+1]; 1217 if (jmin < jmax) { 1218 for (j=jmin; j<jmax; j++){ 1219 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1220 } 1221 /* add the k-th row into il and jl */ 1222 il[k] = jmin; 1223 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1224 } 1225 } 1226 } while (sctx.chshift); 1227 ierr = PetscFree(il);CHKERRQ(ierr); 1228 1229 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1230 C->factor = FACTOR_CHOLESKY; 1231 C->assembled = PETSC_TRUE; 1232 C->preallocated = PETSC_TRUE; 1233 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1234 if (sctx.nshift){ 1235 if (shiftnz) { 1236 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1237 } else if (shiftpd) { 1238 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1239 } 1240 } 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1246 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1247 { 1248 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1249 Mat_SeqSBAIJ *b; 1250 Mat B; 1251 PetscErrorCode ierr; 1252 PetscTruth perm_identity; 1253 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1254 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1255 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1256 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1257 PetscReal fill=info->fill,levels=info->levels; 1258 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1259 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1260 PetscBT lnkbt; 1261 1262 PetscFunctionBegin; 1263 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1264 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1265 1266 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1267 ui[0] = 0; 1268 1269 /* special case that simply copies fill pattern */ 1270 if (!levels && perm_identity) { 1271 for (i=0; i<am; i++) { 1272 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1273 } 1274 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1275 cols = uj; 1276 for (i=0; i<am; i++) { 1277 aj = a->j + a->diag[i]; 1278 ncols = ui[i+1] - ui[i]; 1279 for (j=0; j<ncols; j++) *cols++ = *aj++; 1280 } 1281 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1282 /* initialization */ 1283 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1284 1285 /* jl: linked list for storing indices of the pivot rows 1286 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1287 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1288 il = jl + am; 1289 uj_ptr = (PetscInt**)(il + am); 1290 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1291 for (i=0; i<am; i++){ 1292 jl[i] = am; il[i] = 0; 1293 } 1294 1295 /* create and initialize a linked list for storing column indices of the active row k */ 1296 nlnk = am + 1; 1297 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1298 1299 /* initial FreeSpace size is fill*(ai[am]+1) */ 1300 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1301 current_space = free_space; 1302 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1303 current_space_lvl = free_space_lvl; 1304 1305 for (k=0; k<am; k++){ /* for each active row k */ 1306 /* initialize lnk by the column indices of row rip[k] of A */ 1307 nzk = 0; 1308 ncols = ai[rip[k]+1] - ai[rip[k]]; 1309 ncols_upper = 0; 1310 for (j=0; j<ncols; j++){ 1311 i = *(aj + ai[rip[k]] + j); 1312 if (rip[i] >= k){ /* only take upper triangular entry */ 1313 ajtmp[ncols_upper] = i; 1314 ncols_upper++; 1315 } 1316 } 1317 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1318 nzk += nlnk; 1319 1320 /* update lnk by computing fill-in for each pivot row to be merged in */ 1321 prow = jl[k]; /* 1st pivot row */ 1322 1323 while (prow < k){ 1324 nextprow = jl[prow]; 1325 1326 /* merge prow into k-th row */ 1327 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1328 jmax = ui[prow+1]; 1329 ncols = jmax-jmin; 1330 i = jmin - ui[prow]; 1331 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1332 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1333 j = *(uj - 1); 1334 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1335 nzk += nlnk; 1336 1337 /* update il and jl for prow */ 1338 if (jmin < jmax){ 1339 il[prow] = jmin; 1340 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1341 } 1342 prow = nextprow; 1343 } 1344 1345 /* if free space is not available, make more free space */ 1346 if (current_space->local_remaining<nzk) { 1347 i = am - k + 1; /* num of unfactored rows */ 1348 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1349 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1350 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1351 reallocs++; 1352 } 1353 1354 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1355 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1356 1357 /* add the k-th row into il and jl */ 1358 if (nzk > 1){ 1359 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1360 jl[k] = jl[i]; jl[i] = k; 1361 il[k] = ui[k] + 1; 1362 } 1363 uj_ptr[k] = current_space->array; 1364 uj_lvl_ptr[k] = current_space_lvl->array; 1365 1366 current_space->array += nzk; 1367 current_space->local_used += nzk; 1368 current_space->local_remaining -= nzk; 1369 1370 current_space_lvl->array += nzk; 1371 current_space_lvl->local_used += nzk; 1372 current_space_lvl->local_remaining -= nzk; 1373 1374 ui[k+1] = ui[k] + nzk; 1375 } 1376 1377 #if defined(PETSC_USE_INFO) 1378 if (ai[am] != 0) { 1379 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1380 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1381 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1382 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1383 } else { 1384 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1385 } 1386 #endif 1387 1388 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1389 ierr = PetscFree(jl);CHKERRQ(ierr); 1390 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1391 1392 /* destroy list of free space and other temporary array(s) */ 1393 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1394 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1395 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1396 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1397 1398 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1399 1400 /* put together the new matrix in MATSEQSBAIJ format */ 1401 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1402 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1403 B = *fact; 1404 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1405 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1406 1407 b = (Mat_SeqSBAIJ*)B->data; 1408 b->singlemalloc = PETSC_FALSE; 1409 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1410 b->j = uj; 1411 b->i = ui; 1412 b->diag = 0; 1413 b->ilen = 0; 1414 b->imax = 0; 1415 b->row = perm; 1416 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1417 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1418 b->icol = perm; 1419 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1420 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1421 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1422 b->maxnz = b->nz = ui[am]; 1423 b->free_a = PETSC_TRUE; 1424 b->free_ij = PETSC_TRUE; 1425 1426 B->factor = FACTOR_CHOLESKY; 1427 B->info.factor_mallocs = reallocs; 1428 B->info.fill_ratio_given = fill; 1429 if (ai[am] != 0) { 1430 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1431 } else { 1432 B->info.fill_ratio_needed = 0.0; 1433 } 1434 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1435 if (perm_identity){ 1436 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1437 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1438 } 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1444 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1445 { 1446 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1447 Mat_SeqSBAIJ *b; 1448 Mat B; 1449 PetscErrorCode ierr; 1450 PetscTruth perm_identity; 1451 PetscReal fill = info->fill; 1452 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1453 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1454 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1455 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1456 PetscBT lnkbt; 1457 IS iperm; 1458 1459 PetscFunctionBegin; 1460 /* check whether perm is the identity mapping */ 1461 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1462 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1463 1464 if (!perm_identity){ 1465 /* check if perm is symmetric! */ 1466 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1467 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1468 for (i=0; i<am; i++) { 1469 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1470 } 1471 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1472 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1473 } 1474 1475 /* initialization */ 1476 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1477 ui[0] = 0; 1478 1479 /* jl: linked list for storing indices of the pivot rows 1480 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1481 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1482 il = jl + am; 1483 cols = il + am; 1484 ui_ptr = (PetscInt**)(cols + am); 1485 for (i=0; i<am; i++){ 1486 jl[i] = am; il[i] = 0; 1487 } 1488 1489 /* create and initialize a linked list for storing column indices of the active row k */ 1490 nlnk = am + 1; 1491 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1492 1493 /* initial FreeSpace size is fill*(ai[am]+1) */ 1494 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1495 current_space = free_space; 1496 1497 for (k=0; k<am; k++){ /* for each active row k */ 1498 /* initialize lnk by the column indices of row rip[k] of A */ 1499 nzk = 0; 1500 ncols = ai[rip[k]+1] - ai[rip[k]]; 1501 ncols_upper = 0; 1502 for (j=0; j<ncols; j++){ 1503 i = rip[*(aj + ai[rip[k]] + j)]; 1504 if (i >= k){ /* only take upper triangular entry */ 1505 cols[ncols_upper] = i; 1506 ncols_upper++; 1507 } 1508 } 1509 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1510 nzk += nlnk; 1511 1512 /* update lnk by computing fill-in for each pivot row to be merged in */ 1513 prow = jl[k]; /* 1st pivot row */ 1514 1515 while (prow < k){ 1516 nextprow = jl[prow]; 1517 /* merge prow into k-th row */ 1518 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1519 jmax = ui[prow+1]; 1520 ncols = jmax-jmin; 1521 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1522 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1523 nzk += nlnk; 1524 1525 /* update il and jl for prow */ 1526 if (jmin < jmax){ 1527 il[prow] = jmin; 1528 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1529 } 1530 prow = nextprow; 1531 } 1532 1533 /* if free space is not available, make more free space */ 1534 if (current_space->local_remaining<nzk) { 1535 i = am - k + 1; /* num of unfactored rows */ 1536 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1537 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1538 reallocs++; 1539 } 1540 1541 /* copy data into free space, then initialize lnk */ 1542 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1543 1544 /* add the k-th row into il and jl */ 1545 if (nzk-1 > 0){ 1546 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1547 jl[k] = jl[i]; jl[i] = k; 1548 il[k] = ui[k] + 1; 1549 } 1550 ui_ptr[k] = current_space->array; 1551 current_space->array += nzk; 1552 current_space->local_used += nzk; 1553 current_space->local_remaining -= nzk; 1554 1555 ui[k+1] = ui[k] + nzk; 1556 } 1557 1558 #if defined(PETSC_USE_INFO) 1559 if (ai[am] != 0) { 1560 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1561 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1562 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1563 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1564 } else { 1565 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1566 } 1567 #endif 1568 1569 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1570 ierr = PetscFree(jl);CHKERRQ(ierr); 1571 1572 /* destroy list of free space and other temporary array(s) */ 1573 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1574 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1575 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1576 1577 /* put together the new matrix in MATSEQSBAIJ format */ 1578 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1579 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1580 B = *fact; 1581 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1582 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1583 1584 b = (Mat_SeqSBAIJ*)B->data; 1585 b->singlemalloc = PETSC_FALSE; 1586 b->free_a = PETSC_TRUE; 1587 b->free_ij = PETSC_TRUE; 1588 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1589 b->j = uj; 1590 b->i = ui; 1591 b->diag = 0; 1592 b->ilen = 0; 1593 b->imax = 0; 1594 b->row = perm; 1595 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1596 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1597 b->icol = perm; 1598 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1599 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1600 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1601 b->maxnz = b->nz = ui[am]; 1602 1603 B->factor = FACTOR_CHOLESKY; 1604 B->info.factor_mallocs = reallocs; 1605 B->info.fill_ratio_given = fill; 1606 if (ai[am] != 0) { 1607 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1608 } else { 1609 B->info.fill_ratio_needed = 0.0; 1610 } 1611 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1612 if (perm_identity){ 1613 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1614 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1615 } 1616 PetscFunctionReturn(0); 1617 } 1618