1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->rmap.n; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->free_a = PETSC_TRUE; 233 b->free_ij = PETSC_TRUE; 234 b->sorted = PETSC_FALSE; 235 b->singlemalloc = PETSC_FALSE; 236 b->a = new_a; 237 b->j = new_j; 238 b->i = new_i; 239 b->ilen = 0; 240 b->imax = 0; 241 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 242 b->row = isirow; 243 b->col = iscolf; 244 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 245 b->maxnz = b->nz = new_i[n]; 246 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 247 (*fact)->info.factor_mallocs = 0; 248 249 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 253 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 254 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 255 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 256 257 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 258 259 PetscFunctionReturn(0); 260 #endif 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 265 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 266 { 267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 268 IS isicol; 269 PetscErrorCode ierr; 270 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 271 PetscInt *bi,*bj,*ajtmp; 272 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 273 PetscReal f; 274 PetscInt nlnk,*lnk,k,**bi_ptr; 275 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 276 PetscBT lnkbt; 277 278 PetscFunctionBegin; 279 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 280 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 281 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 282 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 283 284 /* get new row pointers */ 285 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 286 bi[0] = 0; 287 288 /* bdiag is location of diagonal in factor */ 289 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 290 bdiag[0] = 0; 291 292 /* linked list for storing column indices of the active row */ 293 nlnk = n + 1; 294 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 295 296 ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr); 297 bi_ptr = (PetscInt**)(im + n); 298 299 /* initial FreeSpace size is f*(ai[n]+1) */ 300 f = info->fill; 301 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 302 current_space = free_space; 303 304 for (i=0; i<n; i++) { 305 /* copy previous fill into linked list */ 306 nzi = 0; 307 nnz = ai[r[i]+1] - ai[r[i]]; 308 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 309 ajtmp = aj + ai[r[i]]; 310 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 317 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 318 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 319 nzi += nlnk; 320 row = lnk[row]; 321 } 322 bi[i+1] = bi[i] + nzi; 323 im[i] = nzi; 324 325 /* mark bdiag */ 326 nzbd = 0; 327 nnz = nzi; 328 k = lnk[n]; 329 while (nnz-- && k < i){ 330 nzbd++; 331 k = lnk[k]; 332 } 333 bdiag[i] = bi[i] + nzbd; 334 335 /* if free space is not available, make more free space */ 336 if (current_space->local_remaining<nzi) { 337 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 338 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 339 reallocs++; 340 } 341 342 /* copy data into free space, then initialize lnk */ 343 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 344 bi_ptr[i] = current_space->array; 345 current_space->array += nzi; 346 current_space->local_used += nzi; 347 current_space->local_remaining -= nzi; 348 } 349 #if defined(PETSC_USE_INFO) 350 if (ai[n] != 0) { 351 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 352 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 353 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 354 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 355 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 356 } else { 357 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 358 } 359 #endif 360 361 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 362 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 363 364 /* destroy list of free space and other temporary array(s) */ 365 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 366 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 367 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 368 ierr = PetscFree(im);CHKERRQ(ierr); 369 370 /* put together the new matrix */ 371 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 372 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 373 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 374 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 375 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 376 b = (Mat_SeqAIJ*)(*B)->data; 377 b->free_a = PETSC_TRUE; 378 b->free_ij = PETSC_TRUE; 379 b->singlemalloc = PETSC_FALSE; 380 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 381 b->j = bj; 382 b->i = bi; 383 b->diag = bdiag; 384 b->ilen = 0; 385 b->imax = 0; 386 b->row = isrow; 387 b->col = iscol; 388 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 389 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 390 b->icol = isicol; 391 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 392 393 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 394 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 395 b->maxnz = b->nz = bi[n] ; 396 397 (*B)->factor = FACTOR_LU; 398 (*B)->info.factor_mallocs = reallocs; 399 (*B)->info.fill_ratio_given = f; 400 401 if (ai[n] != 0) { 402 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 403 } else { 404 (*B)->info.fill_ratio_needed = 0.0; 405 } 406 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 407 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 408 PetscFunctionReturn(0); 409 } 410 411 /* 412 Trouble in factorization, should we dump the original matrix? 413 */ 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatFactorDumpMatrix" 416 PetscErrorCode MatFactorDumpMatrix(Mat A) 417 { 418 PetscErrorCode ierr; 419 PetscTruth flg; 420 421 PetscFunctionBegin; 422 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 423 if (flg) { 424 PetscViewer viewer; 425 char filename[PETSC_MAX_PATH_LEN]; 426 427 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 428 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 429 ierr = MatView(A,viewer);CHKERRQ(ierr); 430 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 431 } 432 PetscFunctionReturn(0); 433 } 434 435 /* ----------------------------------------------------------- */ 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 438 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 439 { 440 Mat C=*B; 441 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 442 IS isrow = b->row,isicol = b->icol; 443 PetscErrorCode ierr; 444 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 445 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 446 PetscInt *diag_offset = b->diag,diag,*pj; 447 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 448 PetscScalar d; 449 PetscReal rs; 450 LUShift_Ctx sctx; 451 PetscInt newshift,*ddiag; 452 453 PetscFunctionBegin; 454 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 455 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 456 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 457 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 458 rtmps = rtmp; ics = ic; 459 460 sctx.shift_top = 0; 461 sctx.nshift_max = 0; 462 sctx.shift_lo = 0; 463 sctx.shift_hi = 0; 464 465 if (!a->diag) { 466 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 467 } 468 /* if both shift schemes are chosen by user, only use info->shiftpd */ 469 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 470 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 471 PetscInt *aai = a->i; 472 ddiag = a->diag; 473 sctx.shift_top = 0; 474 for (i=0; i<n; i++) { 475 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 476 d = (a->a)[ddiag[i]]; 477 rs = -PetscAbsScalar(d) - PetscRealPart(d); 478 v = a->a+aai[i]; 479 nz = aai[i+1] - aai[i]; 480 for (j=0; j<nz; j++) 481 rs += PetscAbsScalar(v[j]); 482 if (rs>sctx.shift_top) sctx.shift_top = rs; 483 } 484 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 485 sctx.shift_top *= 1.1; 486 sctx.nshift_max = 5; 487 sctx.shift_lo = 0.; 488 sctx.shift_hi = 1.; 489 } 490 491 sctx.shift_amount = 0; 492 sctx.nshift = 0; 493 do { 494 sctx.lushift = PETSC_FALSE; 495 for (i=0; i<n; i++){ 496 nz = bi[i+1] - bi[i]; 497 bjtmp = bj + bi[i]; 498 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 499 500 /* load in initial (unfactored row) */ 501 nz = a->i[r[i]+1] - a->i[r[i]]; 502 ajtmp = a->j + a->i[r[i]]; 503 v = a->a + a->i[r[i]]; 504 for (j=0; j<nz; j++) { 505 rtmp[ics[ajtmp[j]]] = v[j]; 506 } 507 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 508 509 row = *bjtmp++; 510 while (row < i) { 511 pc = rtmp + row; 512 if (*pc != 0.0) { 513 pv = b->a + diag_offset[row]; 514 pj = b->j + diag_offset[row] + 1; 515 multiplier = *pc / *pv++; 516 *pc = multiplier; 517 nz = bi[row+1] - diag_offset[row] - 1; 518 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 519 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 520 } 521 row = *bjtmp++; 522 } 523 /* finished row so stick it into b->a */ 524 pv = b->a + bi[i] ; 525 pj = b->j + bi[i] ; 526 nz = bi[i+1] - bi[i]; 527 diag = diag_offset[i] - bi[i]; 528 rs = 0.0; 529 for (j=0; j<nz; j++) { 530 pv[j] = rtmps[pj[j]]; 531 if (j != diag) rs += PetscAbsScalar(pv[j]); 532 } 533 534 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 535 sctx.rs = rs; 536 sctx.pv = pv[diag]; 537 ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr); 538 if (newshift == 1) break; 539 } 540 541 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 542 /* 543 * if no shift in this attempt & shifting & started shifting & can refine, 544 * then try lower shift 545 */ 546 sctx.shift_hi = info->shift_fraction; 547 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 548 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 549 sctx.lushift = PETSC_TRUE; 550 sctx.nshift++; 551 } 552 } while (sctx.lushift); 553 554 /* invert diagonal entries for simplier triangular solves */ 555 for (i=0; i<n; i++) { 556 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 557 } 558 559 ierr = PetscFree(rtmp);CHKERRQ(ierr); 560 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 561 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 562 C->factor = FACTOR_LU; 563 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 564 C->assembled = PETSC_TRUE; 565 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 566 if (sctx.nshift){ 567 if (info->shiftnz) { 568 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 569 } else if (info->shiftpd) { 570 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 571 } 572 } 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 578 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 579 { 580 PetscFunctionBegin; 581 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 582 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 583 PetscFunctionReturn(0); 584 } 585 586 587 /* ----------------------------------------------------------- */ 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatLUFactor_SeqAIJ" 590 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 591 { 592 PetscErrorCode ierr; 593 Mat C; 594 595 PetscFunctionBegin; 596 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 597 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 598 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 599 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 /* ----------------------------------------------------------- */ 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatSolve_SeqAIJ" 605 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 606 { 607 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 608 IS iscol = a->col,isrow = a->row; 609 PetscErrorCode ierr; 610 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 611 PetscInt nz,*rout,*cout; 612 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 613 614 PetscFunctionBegin; 615 if (!n) PetscFunctionReturn(0); 616 617 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 618 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 619 tmp = a->solve_work; 620 621 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 622 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 623 624 /* forward solve the lower triangular */ 625 tmp[0] = b[*r++]; 626 tmps = tmp; 627 for (i=1; i<n; i++) { 628 v = aa + ai[i] ; 629 vi = aj + ai[i] ; 630 nz = a->diag[i] - ai[i]; 631 sum = b[*r++]; 632 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 633 tmp[i] = sum; 634 } 635 636 /* backward solve the upper triangular */ 637 for (i=n-1; i>=0; i--){ 638 v = aa + a->diag[i] + 1; 639 vi = aj + a->diag[i] + 1; 640 nz = ai[i+1] - a->diag[i] - 1; 641 sum = tmp[i]; 642 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 643 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 644 } 645 646 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 647 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 648 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 649 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 650 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatMatSolve_SeqAIJ" 656 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 657 { 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 659 IS iscol = a->col,isrow = a->row; 660 PetscErrorCode ierr; 661 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 662 PetscInt nz,*rout,*cout,neq; 663 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 664 665 PetscFunctionBegin; 666 if (!n) PetscFunctionReturn(0); 667 668 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 669 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 670 671 tmp = a->solve_work; 672 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 673 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 674 675 for (neq=0; neq<n; neq++){ 676 /* forward solve the lower triangular */ 677 tmp[0] = b[r[0]]; 678 tmps = tmp; 679 for (i=1; i<n; i++) { 680 v = aa + ai[i] ; 681 vi = aj + ai[i] ; 682 nz = a->diag[i] - ai[i]; 683 sum = b[r[i]]; 684 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 685 tmp[i] = sum; 686 } 687 /* backward solve the upper triangular */ 688 for (i=n-1; i>=0; i--){ 689 v = aa + a->diag[i] + 1; 690 vi = aj + a->diag[i] + 1; 691 nz = ai[i+1] - a->diag[i] - 1; 692 sum = tmp[i]; 693 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 694 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 695 } 696 697 b += n; 698 x += n; 699 } 700 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 701 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 702 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 703 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 704 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 /* ----------------------------------------------------------- */ 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 711 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 712 { 713 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 714 PetscErrorCode ierr; 715 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 716 PetscScalar *x,*b,*aa = a->a; 717 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 718 PetscInt adiag_i,i,*vi,nz,ai_i; 719 PetscScalar *v,sum; 720 #endif 721 722 PetscFunctionBegin; 723 if (!n) PetscFunctionReturn(0); 724 725 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 726 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 727 728 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 729 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 730 #else 731 /* forward solve the lower triangular */ 732 x[0] = b[0]; 733 for (i=1; i<n; i++) { 734 ai_i = ai[i]; 735 v = aa + ai_i; 736 vi = aj + ai_i; 737 nz = adiag[i] - ai_i; 738 sum = b[i]; 739 while (nz--) sum -= *v++ * x[*vi++]; 740 x[i] = sum; 741 } 742 743 /* backward solve the upper triangular */ 744 for (i=n-1; i>=0; i--){ 745 adiag_i = adiag[i]; 746 v = aa + adiag_i + 1; 747 vi = aj + adiag_i + 1; 748 nz = ai[i+1] - adiag_i - 1; 749 sum = x[i]; 750 while (nz--) sum -= *v++ * x[*vi++]; 751 x[i] = sum*aa[adiag_i]; 752 } 753 #endif 754 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 755 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 756 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 762 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 763 { 764 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 765 IS iscol = a->col,isrow = a->row; 766 PetscErrorCode ierr; 767 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 768 PetscInt nz,*rout,*cout; 769 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 770 771 PetscFunctionBegin; 772 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 773 774 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 775 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 776 tmp = a->solve_work; 777 778 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 779 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 780 781 /* forward solve the lower triangular */ 782 tmp[0] = b[*r++]; 783 for (i=1; i<n; i++) { 784 v = aa + ai[i] ; 785 vi = aj + ai[i] ; 786 nz = a->diag[i] - ai[i]; 787 sum = b[*r++]; 788 while (nz--) sum -= *v++ * tmp[*vi++ ]; 789 tmp[i] = sum; 790 } 791 792 /* backward solve the upper triangular */ 793 for (i=n-1; i>=0; i--){ 794 v = aa + a->diag[i] + 1; 795 vi = aj + a->diag[i] + 1; 796 nz = ai[i+1] - a->diag[i] - 1; 797 sum = tmp[i]; 798 while (nz--) sum -= *v++ * tmp[*vi++ ]; 799 tmp[i] = sum*aa[a->diag[i]]; 800 x[*c--] += tmp[i]; 801 } 802 803 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 804 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 805 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 806 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 807 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 808 809 PetscFunctionReturn(0); 810 } 811 /* -------------------------------------------------------------------*/ 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 814 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 815 { 816 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 817 IS iscol = a->col,isrow = a->row; 818 PetscErrorCode ierr; 819 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 820 PetscInt nz,*rout,*cout,*diag = a->diag; 821 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 822 823 PetscFunctionBegin; 824 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 826 tmp = a->solve_work; 827 828 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 829 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 830 831 /* copy the b into temp work space according to permutation */ 832 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 833 834 /* forward solve the U^T */ 835 for (i=0; i<n; i++) { 836 v = aa + diag[i] ; 837 vi = aj + diag[i] + 1; 838 nz = ai[i+1] - diag[i] - 1; 839 s1 = tmp[i]; 840 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 841 while (nz--) { 842 tmp[*vi++ ] -= (*v++)*s1; 843 } 844 tmp[i] = s1; 845 } 846 847 /* backward solve the L^T */ 848 for (i=n-1; i>=0; i--){ 849 v = aa + diag[i] - 1 ; 850 vi = aj + diag[i] - 1 ; 851 nz = diag[i] - ai[i]; 852 s1 = tmp[i]; 853 while (nz--) { 854 tmp[*vi-- ] -= (*v--)*s1; 855 } 856 } 857 858 /* copy tmp into x according to permutation */ 859 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 860 861 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 862 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 863 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 864 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 865 866 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 872 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 873 { 874 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 875 IS iscol = a->col,isrow = a->row; 876 PetscErrorCode ierr; 877 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 878 PetscInt nz,*rout,*cout,*diag = a->diag; 879 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 880 881 PetscFunctionBegin; 882 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 883 884 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 885 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 886 tmp = a->solve_work; 887 888 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 889 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 890 891 /* copy the b into temp work space according to permutation */ 892 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 893 894 /* forward solve the U^T */ 895 for (i=0; i<n; i++) { 896 v = aa + diag[i] ; 897 vi = aj + diag[i] + 1; 898 nz = ai[i+1] - diag[i] - 1; 899 tmp[i] *= *v++; 900 while (nz--) { 901 tmp[*vi++ ] -= (*v++)*tmp[i]; 902 } 903 } 904 905 /* backward solve the L^T */ 906 for (i=n-1; i>=0; i--){ 907 v = aa + diag[i] - 1 ; 908 vi = aj + diag[i] - 1 ; 909 nz = diag[i] - ai[i]; 910 while (nz--) { 911 tmp[*vi-- ] -= (*v--)*tmp[i]; 912 } 913 } 914 915 /* copy tmp into x according to permutation */ 916 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 917 918 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 919 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 920 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 921 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 922 923 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } 926 /* ----------------------------------------------------------------*/ 927 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 931 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 932 { 933 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 934 IS isicol; 935 PetscErrorCode ierr; 936 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j; 937 PetscInt *bi,*cols,nnz,*cols_lvl; 938 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 939 PetscInt i,levels,diagonal_fill; 940 PetscTruth col_identity,row_identity; 941 PetscReal f; 942 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 943 PetscBT lnkbt; 944 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 945 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 946 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 947 948 PetscFunctionBegin; 949 f = info->fill; 950 levels = (PetscInt)info->levels; 951 diagonal_fill = (PetscInt)info->diagonal_fill; 952 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 953 954 /* special case that simply copies fill pattern */ 955 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 956 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 957 if (!levels && row_identity && col_identity) { 958 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 959 (*fact)->factor = FACTOR_LU; 960 (*fact)->info.factor_mallocs = 0; 961 (*fact)->info.fill_ratio_given = info->fill; 962 (*fact)->info.fill_ratio_needed = 1.0; 963 b = (Mat_SeqAIJ*)(*fact)->data; 964 if (!b->diag) { 965 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 966 } 967 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 968 b->row = isrow; 969 b->col = iscol; 970 b->icol = isicol; 971 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 972 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 973 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 974 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 979 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 980 981 /* get new row pointers */ 982 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 983 bi[0] = 0; 984 /* bdiag is location of diagonal in factor */ 985 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 986 bdiag[0] = 0; 987 988 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 989 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 990 991 /* create a linked list for storing column indices of the active row */ 992 nlnk = n + 1; 993 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 994 995 /* initial FreeSpace size is f*(ai[n]+1) */ 996 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 997 current_space = free_space; 998 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 999 current_space_lvl = free_space_lvl; 1000 1001 for (i=0; i<n; i++) { 1002 nzi = 0; 1003 /* copy current row into linked list */ 1004 nnz = ai[r[i]+1] - ai[r[i]]; 1005 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1006 cols = aj + ai[r[i]]; 1007 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1008 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1009 nzi += nlnk; 1010 1011 /* make sure diagonal entry is included */ 1012 if (diagonal_fill && lnk[i] == -1) { 1013 fm = n; 1014 while (lnk[fm] < i) fm = lnk[fm]; 1015 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1016 lnk[fm] = i; 1017 lnk_lvl[i] = 0; 1018 nzi++; dcount++; 1019 } 1020 1021 /* add pivot rows into the active row */ 1022 nzbd = 0; 1023 prow = lnk[n]; 1024 while (prow < i) { 1025 nnz = bdiag[prow]; 1026 cols = bj_ptr[prow] + nnz + 1; 1027 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1028 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1029 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1030 nzi += nlnk; 1031 prow = lnk[prow]; 1032 nzbd++; 1033 } 1034 bdiag[i] = nzbd; 1035 bi[i+1] = bi[i] + nzi; 1036 1037 /* if free space is not available, make more free space */ 1038 if (current_space->local_remaining<nzi) { 1039 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1040 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1041 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1042 reallocs++; 1043 } 1044 1045 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1046 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1047 bj_ptr[i] = current_space->array; 1048 bjlvl_ptr[i] = current_space_lvl->array; 1049 1050 /* make sure the active row i has diagonal entry */ 1051 if (*(bj_ptr[i]+bdiag[i]) != i) { 1052 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1053 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1054 } 1055 1056 current_space->array += nzi; 1057 current_space->local_used += nzi; 1058 current_space->local_remaining -= nzi; 1059 current_space_lvl->array += nzi; 1060 current_space_lvl->local_used += nzi; 1061 current_space_lvl->local_remaining -= nzi; 1062 } 1063 1064 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1065 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1066 1067 /* destroy list of free space and other temporary arrays */ 1068 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1069 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1070 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1071 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1072 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1073 1074 #if defined(PETSC_USE_INFO) 1075 { 1076 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1077 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1078 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1079 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1080 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1081 if (diagonal_fill) { 1082 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1083 } 1084 } 1085 #endif 1086 1087 /* put together the new matrix */ 1088 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1089 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1090 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1091 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1092 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1093 b = (Mat_SeqAIJ*)(*fact)->data; 1094 b->free_a = PETSC_TRUE; 1095 b->free_ij = PETSC_TRUE; 1096 b->singlemalloc = PETSC_FALSE; 1097 len = (bi[n] )*sizeof(PetscScalar); 1098 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1099 b->j = bj; 1100 b->i = bi; 1101 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1102 b->diag = bdiag; 1103 b->ilen = 0; 1104 b->imax = 0; 1105 b->row = isrow; 1106 b->col = iscol; 1107 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1108 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1109 b->icol = isicol; 1110 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1111 /* In b structure: Free imax, ilen, old a, old j. 1112 Allocate bdiag, solve_work, new a, new j */ 1113 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1114 b->maxnz = b->nz = bi[n] ; 1115 (*fact)->factor = FACTOR_LU; 1116 (*fact)->info.factor_mallocs = reallocs; 1117 (*fact)->info.fill_ratio_given = f; 1118 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1119 1120 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1121 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1122 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #include "src/mat/impls/sbaij/seq/sbaij.h" 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1129 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1130 { 1131 Mat C = *B; 1132 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1133 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1134 IS ip=b->row; 1135 PetscErrorCode ierr; 1136 PetscInt *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1137 PetscInt *ai=a->i,*aj=a->j; 1138 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1139 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1140 PetscReal zeropivot,rs,shiftnz; 1141 PetscReal shiftpd; 1142 ChShift_Ctx sctx; 1143 PetscInt newshift; 1144 1145 PetscFunctionBegin; 1146 shiftnz = info->shiftnz; 1147 shiftpd = info->shiftpd; 1148 zeropivot = info->zeropivot; 1149 1150 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1151 1152 /* initialization */ 1153 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1154 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1155 jl = il + mbs; 1156 rtmp = (MatScalar*)(jl + mbs); 1157 1158 sctx.shift_amount = 0; 1159 sctx.nshift = 0; 1160 do { 1161 sctx.chshift = PETSC_FALSE; 1162 for (i=0; i<mbs; i++) { 1163 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1164 } 1165 1166 for (k = 0; k<mbs; k++){ 1167 bval = ba + bi[k]; 1168 /* initialize k-th row by the perm[k]-th row of A */ 1169 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1170 for (j = jmin; j < jmax; j++){ 1171 col = rip[aj[j]]; 1172 if (col >= k){ /* only take upper triangular entry */ 1173 rtmp[col] = aa[j]; 1174 *bval++ = 0.0; /* for in-place factorization */ 1175 } 1176 } 1177 /* shift the diagonal of the matrix */ 1178 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1179 1180 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1181 dk = rtmp[k]; 1182 i = jl[k]; /* first row to be added to k_th row */ 1183 1184 while (i < k){ 1185 nexti = jl[i]; /* next row to be added to k_th row */ 1186 1187 /* compute multiplier, update diag(k) and U(i,k) */ 1188 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1189 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1190 dk += uikdi*ba[ili]; 1191 ba[ili] = uikdi; /* -U(i,k) */ 1192 1193 /* add multiple of row i to k-th row */ 1194 jmin = ili + 1; jmax = bi[i+1]; 1195 if (jmin < jmax){ 1196 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1197 /* update il and jl for row i */ 1198 il[i] = jmin; 1199 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1200 } 1201 i = nexti; 1202 } 1203 1204 /* shift the diagonals when zero pivot is detected */ 1205 /* compute rs=sum of abs(off-diagonal) */ 1206 rs = 0.0; 1207 jmin = bi[k]+1; 1208 nz = bi[k+1] - jmin; 1209 if (nz){ 1210 bcol = bj + jmin; 1211 while (nz--){ 1212 rs += PetscAbsScalar(rtmp[*bcol]); 1213 bcol++; 1214 } 1215 } 1216 1217 sctx.rs = rs; 1218 sctx.pv = dk; 1219 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1220 if (newshift == 1) break; 1221 1222 /* copy data into U(k,:) */ 1223 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1224 jmin = bi[k]+1; jmax = bi[k+1]; 1225 if (jmin < jmax) { 1226 for (j=jmin; j<jmax; j++){ 1227 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1228 } 1229 /* add the k-th row into il and jl */ 1230 il[k] = jmin; 1231 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1232 } 1233 } 1234 } while (sctx.chshift); 1235 ierr = PetscFree(il);CHKERRQ(ierr); 1236 1237 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1238 C->factor = FACTOR_CHOLESKY; 1239 C->assembled = PETSC_TRUE; 1240 C->preallocated = PETSC_TRUE; 1241 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1242 if (sctx.nshift){ 1243 if (shiftnz) { 1244 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1245 } else if (shiftpd) { 1246 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1247 } 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1254 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1255 { 1256 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1257 Mat_SeqSBAIJ *b; 1258 Mat B; 1259 PetscErrorCode ierr; 1260 PetscTruth perm_identity; 1261 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1262 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1263 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1264 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1265 PetscReal fill=info->fill,levels=info->levels; 1266 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1267 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1268 PetscBT lnkbt; 1269 1270 PetscFunctionBegin; 1271 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1272 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1273 1274 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1275 ui[0] = 0; 1276 1277 /* special case that simply copies fill pattern */ 1278 if (!levels && perm_identity) { 1279 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1280 for (i=0; i<am; i++) { 1281 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1282 } 1283 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1284 cols = uj; 1285 for (i=0; i<am; i++) { 1286 aj = a->j + a->diag[i]; 1287 ncols = ui[i+1] - ui[i]; 1288 for (j=0; j<ncols; j++) *cols++ = *aj++; 1289 } 1290 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1291 /* initialization */ 1292 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1293 1294 /* jl: linked list for storing indices of the pivot rows 1295 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1296 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1297 il = jl + am; 1298 uj_ptr = (PetscInt**)(il + am); 1299 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1300 for (i=0; i<am; i++){ 1301 jl[i] = am; il[i] = 0; 1302 } 1303 1304 /* create and initialize a linked list for storing column indices of the active row k */ 1305 nlnk = am + 1; 1306 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1307 1308 /* initial FreeSpace size is fill*(ai[am]+1) */ 1309 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1310 current_space = free_space; 1311 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1312 current_space_lvl = free_space_lvl; 1313 1314 for (k=0; k<am; k++){ /* for each active row k */ 1315 /* initialize lnk by the column indices of row rip[k] of A */ 1316 nzk = 0; 1317 ncols = ai[rip[k]+1] - ai[rip[k]]; 1318 ncols_upper = 0; 1319 for (j=0; j<ncols; j++){ 1320 i = *(aj + ai[rip[k]] + j); 1321 if (rip[i] >= k){ /* only take upper triangular entry */ 1322 ajtmp[ncols_upper] = i; 1323 ncols_upper++; 1324 } 1325 } 1326 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1327 nzk += nlnk; 1328 1329 /* update lnk by computing fill-in for each pivot row to be merged in */ 1330 prow = jl[k]; /* 1st pivot row */ 1331 1332 while (prow < k){ 1333 nextprow = jl[prow]; 1334 1335 /* merge prow into k-th row */ 1336 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1337 jmax = ui[prow+1]; 1338 ncols = jmax-jmin; 1339 i = jmin - ui[prow]; 1340 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1341 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1342 j = *(uj - 1); 1343 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1344 nzk += nlnk; 1345 1346 /* update il and jl for prow */ 1347 if (jmin < jmax){ 1348 il[prow] = jmin; 1349 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1350 } 1351 prow = nextprow; 1352 } 1353 1354 /* if free space is not available, make more free space */ 1355 if (current_space->local_remaining<nzk) { 1356 i = am - k + 1; /* num of unfactored rows */ 1357 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1358 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1359 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1360 reallocs++; 1361 } 1362 1363 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1364 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1365 1366 /* add the k-th row into il and jl */ 1367 if (nzk > 1){ 1368 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1369 jl[k] = jl[i]; jl[i] = k; 1370 il[k] = ui[k] + 1; 1371 } 1372 uj_ptr[k] = current_space->array; 1373 uj_lvl_ptr[k] = current_space_lvl->array; 1374 1375 current_space->array += nzk; 1376 current_space->local_used += nzk; 1377 current_space->local_remaining -= nzk; 1378 1379 current_space_lvl->array += nzk; 1380 current_space_lvl->local_used += nzk; 1381 current_space_lvl->local_remaining -= nzk; 1382 1383 ui[k+1] = ui[k] + nzk; 1384 } 1385 1386 #if defined(PETSC_USE_INFO) 1387 if (ai[am] != 0) { 1388 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1389 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1390 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1391 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1392 } else { 1393 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1394 } 1395 #endif 1396 1397 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1398 ierr = PetscFree(jl);CHKERRQ(ierr); 1399 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1400 1401 /* destroy list of free space and other temporary array(s) */ 1402 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1403 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1404 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1405 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1406 1407 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1408 1409 /* put together the new matrix in MATSEQSBAIJ format */ 1410 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1411 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1412 B = *fact; 1413 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1414 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1415 1416 b = (Mat_SeqSBAIJ*)B->data; 1417 b->singlemalloc = PETSC_FALSE; 1418 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1419 b->j = uj; 1420 b->i = ui; 1421 b->diag = 0; 1422 b->ilen = 0; 1423 b->imax = 0; 1424 b->row = perm; 1425 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1426 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1427 b->icol = perm; 1428 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1429 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1430 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1431 b->maxnz = b->nz = ui[am]; 1432 b->free_a = PETSC_TRUE; 1433 b->free_ij = PETSC_TRUE; 1434 1435 B->factor = FACTOR_CHOLESKY; 1436 B->info.factor_mallocs = reallocs; 1437 B->info.fill_ratio_given = fill; 1438 if (ai[am] != 0) { 1439 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1440 } else { 1441 B->info.fill_ratio_needed = 0.0; 1442 } 1443 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1444 if (perm_identity){ 1445 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1447 } 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1453 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1454 { 1455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1456 Mat_SeqSBAIJ *b; 1457 Mat B; 1458 PetscErrorCode ierr; 1459 PetscTruth perm_identity; 1460 PetscReal fill = info->fill; 1461 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1462 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1463 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1464 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1465 PetscBT lnkbt; 1466 IS iperm; 1467 1468 PetscFunctionBegin; 1469 /* check whether perm is the identity mapping */ 1470 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1471 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1472 1473 if (!perm_identity){ 1474 /* check if perm is symmetric! */ 1475 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1476 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1477 for (i=0; i<am; i++) { 1478 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1479 } 1480 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1481 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1482 } 1483 1484 /* initialization */ 1485 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1486 ui[0] = 0; 1487 1488 /* jl: linked list for storing indices of the pivot rows 1489 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1490 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1491 il = jl + am; 1492 cols = il + am; 1493 ui_ptr = (PetscInt**)(cols + am); 1494 for (i=0; i<am; i++){ 1495 jl[i] = am; il[i] = 0; 1496 } 1497 1498 /* create and initialize a linked list for storing column indices of the active row k */ 1499 nlnk = am + 1; 1500 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1501 1502 /* initial FreeSpace size is fill*(ai[am]+1) */ 1503 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1504 current_space = free_space; 1505 1506 for (k=0; k<am; k++){ /* for each active row k */ 1507 /* initialize lnk by the column indices of row rip[k] of A */ 1508 nzk = 0; 1509 ncols = ai[rip[k]+1] - ai[rip[k]]; 1510 ncols_upper = 0; 1511 for (j=0; j<ncols; j++){ 1512 i = rip[*(aj + ai[rip[k]] + j)]; 1513 if (i >= k){ /* only take upper triangular entry */ 1514 cols[ncols_upper] = i; 1515 ncols_upper++; 1516 } 1517 } 1518 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1519 nzk += nlnk; 1520 1521 /* update lnk by computing fill-in for each pivot row to be merged in */ 1522 prow = jl[k]; /* 1st pivot row */ 1523 1524 while (prow < k){ 1525 nextprow = jl[prow]; 1526 /* merge prow into k-th row */ 1527 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1528 jmax = ui[prow+1]; 1529 ncols = jmax-jmin; 1530 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1531 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1532 nzk += nlnk; 1533 1534 /* update il and jl for prow */ 1535 if (jmin < jmax){ 1536 il[prow] = jmin; 1537 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1538 } 1539 prow = nextprow; 1540 } 1541 1542 /* if free space is not available, make more free space */ 1543 if (current_space->local_remaining<nzk) { 1544 i = am - k + 1; /* num of unfactored rows */ 1545 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1546 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1547 reallocs++; 1548 } 1549 1550 /* copy data into free space, then initialize lnk */ 1551 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1552 1553 /* add the k-th row into il and jl */ 1554 if (nzk-1 > 0){ 1555 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1556 jl[k] = jl[i]; jl[i] = k; 1557 il[k] = ui[k] + 1; 1558 } 1559 ui_ptr[k] = current_space->array; 1560 current_space->array += nzk; 1561 current_space->local_used += nzk; 1562 current_space->local_remaining -= nzk; 1563 1564 ui[k+1] = ui[k] + nzk; 1565 } 1566 1567 #if defined(PETSC_USE_INFO) 1568 if (ai[am] != 0) { 1569 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1570 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1571 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1572 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1573 } else { 1574 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1575 } 1576 #endif 1577 1578 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1579 ierr = PetscFree(jl);CHKERRQ(ierr); 1580 1581 /* destroy list of free space and other temporary array(s) */ 1582 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1583 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1584 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1585 1586 /* put together the new matrix in MATSEQSBAIJ format */ 1587 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1588 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1589 B = *fact; 1590 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1591 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1592 1593 b = (Mat_SeqSBAIJ*)B->data; 1594 b->singlemalloc = PETSC_FALSE; 1595 b->free_a = PETSC_TRUE; 1596 b->free_ij = PETSC_TRUE; 1597 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1598 b->j = uj; 1599 b->i = ui; 1600 b->diag = 0; 1601 b->ilen = 0; 1602 b->imax = 0; 1603 b->row = perm; 1604 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1605 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1606 b->icol = perm; 1607 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1608 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1609 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1610 b->maxnz = b->nz = ui[am]; 1611 1612 B->factor = FACTOR_CHOLESKY; 1613 B->info.factor_mallocs = reallocs; 1614 B->info.fill_ratio_given = fill; 1615 if (ai[am] != 0) { 1616 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1617 } else { 1618 B->info.fill_ratio_needed = 0.0; 1619 } 1620 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1621 if (perm_identity){ 1622 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1623 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627