1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap->n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = MAT_FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 EXTERN_C_BEGIN 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 261 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 262 { 263 PetscFunctionBegin; 264 *flg = PETSC_TRUE; 265 PetscFunctionReturn(0); 266 } 267 EXTERN_C_END 268 269 EXTERN_C_BEGIN 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 272 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 273 { 274 PetscInt n = A->rmap->n; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 279 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 280 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 281 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 282 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 283 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 284 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 285 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 286 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 287 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 288 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 289 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 290 (*B)->factor = ftype; 291 PetscFunctionReturn(0); 292 } 293 EXTERN_C_END 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 297 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 298 { 299 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 300 IS isicol; 301 PetscErrorCode ierr; 302 PetscInt *r,*ic,i,n=A->rmap->n,*ai=a->i,*aj=a->j; 303 PetscInt *bi,*bj,*ajtmp; 304 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 305 PetscReal f; 306 PetscInt nlnk,*lnk,k,**bi_ptr; 307 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 308 PetscBT lnkbt; 309 310 PetscFunctionBegin; 311 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 312 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 313 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 314 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 315 316 /* get new row pointers */ 317 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 318 bi[0] = 0; 319 320 /* bdiag is location of diagonal in factor */ 321 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 322 bdiag[0] = 0; 323 324 /* linked list for storing column indices of the active row */ 325 nlnk = n + 1; 326 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 327 328 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 329 330 /* initial FreeSpace size is f*(ai[n]+1) */ 331 f = info->fill; 332 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 333 current_space = free_space; 334 335 for (i=0; i<n; i++) { 336 /* copy previous fill into linked list */ 337 nzi = 0; 338 nnz = ai[r[i]+1] - ai[r[i]]; 339 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 340 ajtmp = aj + ai[r[i]]; 341 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 342 nzi += nlnk; 343 344 /* add pivot rows into linked list */ 345 row = lnk[n]; 346 while (row < i) { 347 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 348 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 349 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 350 nzi += nlnk; 351 row = lnk[row]; 352 } 353 bi[i+1] = bi[i] + nzi; 354 im[i] = nzi; 355 356 /* mark bdiag */ 357 nzbd = 0; 358 nnz = nzi; 359 k = lnk[n]; 360 while (nnz-- && k < i){ 361 nzbd++; 362 k = lnk[k]; 363 } 364 bdiag[i] = bi[i] + nzbd; 365 366 /* if free space is not available, make more free space */ 367 if (current_space->local_remaining<nzi) { 368 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 369 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 370 reallocs++; 371 } 372 373 /* copy data into free space, then initialize lnk */ 374 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 375 bi_ptr[i] = current_space->array; 376 current_space->array += nzi; 377 current_space->local_used += nzi; 378 current_space->local_remaining -= nzi; 379 } 380 #if defined(PETSC_USE_INFO) 381 if (ai[n] != 0) { 382 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 383 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 384 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 385 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 386 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 387 } else { 388 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 389 } 390 #endif 391 392 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 393 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 394 395 /* destroy list of free space and other temporary array(s) */ 396 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 397 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 398 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 399 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 400 401 /* put together the new matrix */ 402 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 403 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 404 b = (Mat_SeqAIJ*)(*B)->data; 405 b->free_a = PETSC_TRUE; 406 b->free_ij = PETSC_TRUE; 407 b->singlemalloc = PETSC_FALSE; 408 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 409 b->j = bj; 410 b->i = bi; 411 b->diag = bdiag; 412 b->ilen = 0; 413 b->imax = 0; 414 b->row = isrow; 415 b->col = iscol; 416 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 417 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 418 b->icol = isicol; 419 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 420 421 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 422 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 423 b->maxnz = b->nz = bi[n] ; 424 425 (*B)->factor = MAT_FACTOR_LU; 426 (*B)->info.factor_mallocs = reallocs; 427 (*B)->info.fill_ratio_given = f; 428 429 if (ai[n] != 0) { 430 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 431 } else { 432 (*B)->info.fill_ratio_needed = 0.0; 433 } 434 (*B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 435 (*B)->ops->solve = MatSolve_SeqAIJ; 436 (*B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 437 /* switch to inodes if appropriate */ 438 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 /* 443 Trouble in factorization, should we dump the original matrix? 444 */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatFactorDumpMatrix" 447 PetscErrorCode MatFactorDumpMatrix(Mat A) 448 { 449 PetscErrorCode ierr; 450 PetscTruth flg; 451 452 PetscFunctionBegin; 453 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 454 if (flg) { 455 PetscViewer viewer; 456 char filename[PETSC_MAX_PATH_LEN]; 457 458 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 459 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 460 ierr = MatView(A,viewer);CHKERRQ(ierr); 461 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 462 } 463 PetscFunctionReturn(0); 464 } 465 466 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 467 468 /* ----------------------------------------------------------- */ 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 471 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 472 { 473 Mat C=*B; 474 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 475 IS isrow = b->row,isicol = b->icol; 476 PetscErrorCode ierr; 477 PetscInt *r,*ic,i,j,n=A->rmap->n,*bi=b->i,*bj=b->j; 478 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 479 PetscInt *diag_offset = b->diag,diag,*pj; 480 PetscScalar *rtmp,*pc,multiplier,*rtmps; 481 MatScalar *v,*pv; 482 PetscScalar d; 483 PetscReal rs; 484 LUShift_Ctx sctx; 485 PetscInt newshift,*ddiag; 486 487 PetscFunctionBegin; 488 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 489 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 490 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 491 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 492 rtmps = rtmp; ics = ic; 493 494 sctx.shift_top = 0; 495 sctx.nshift_max = 0; 496 sctx.shift_lo = 0; 497 sctx.shift_hi = 0; 498 499 /* if both shift schemes are chosen by user, only use info->shiftpd */ 500 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 501 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 502 PetscInt *aai = a->i; 503 ddiag = a->diag; 504 sctx.shift_top = 0; 505 for (i=0; i<n; i++) { 506 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 507 d = (a->a)[ddiag[i]]; 508 rs = -PetscAbsScalar(d) - PetscRealPart(d); 509 v = a->a+aai[i]; 510 nz = aai[i+1] - aai[i]; 511 for (j=0; j<nz; j++) 512 rs += PetscAbsScalar(v[j]); 513 if (rs>sctx.shift_top) sctx.shift_top = rs; 514 } 515 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 516 sctx.shift_top *= 1.1; 517 sctx.nshift_max = 5; 518 sctx.shift_lo = 0.; 519 sctx.shift_hi = 1.; 520 } 521 522 sctx.shift_amount = 0; 523 sctx.nshift = 0; 524 do { 525 sctx.lushift = PETSC_FALSE; 526 for (i=0; i<n; i++){ 527 nz = bi[i+1] - bi[i]; 528 bjtmp = bj + bi[i]; 529 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 530 531 /* load in initial (unfactored row) */ 532 nz = a->i[r[i]+1] - a->i[r[i]]; 533 ajtmp = a->j + a->i[r[i]]; 534 v = a->a + a->i[r[i]]; 535 for (j=0; j<nz; j++) { 536 rtmp[ics[ajtmp[j]]] = v[j]; 537 } 538 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 539 540 row = *bjtmp++; 541 while (row < i) { 542 pc = rtmp + row; 543 if (*pc != 0.0) { 544 pv = b->a + diag_offset[row]; 545 pj = b->j + diag_offset[row] + 1; 546 multiplier = *pc / *pv++; 547 *pc = multiplier; 548 nz = bi[row+1] - diag_offset[row] - 1; 549 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 550 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 551 } 552 row = *bjtmp++; 553 } 554 /* finished row so stick it into b->a */ 555 pv = b->a + bi[i] ; 556 pj = b->j + bi[i] ; 557 nz = bi[i+1] - bi[i]; 558 diag = diag_offset[i] - bi[i]; 559 rs = 0.0; 560 for (j=0; j<nz; j++) { 561 pv[j] = rtmps[pj[j]]; 562 if (j != diag) rs += PetscAbsScalar(pv[j]); 563 } 564 565 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 566 sctx.rs = rs; 567 sctx.pv = pv[diag]; 568 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 569 if (newshift == 1) break; 570 } 571 572 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 573 /* 574 * if no shift in this attempt & shifting & started shifting & can refine, 575 * then try lower shift 576 */ 577 sctx.shift_hi = info->shift_fraction; 578 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 579 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 580 sctx.lushift = PETSC_TRUE; 581 sctx.nshift++; 582 } 583 } while (sctx.lushift); 584 585 /* invert diagonal entries for simplier triangular solves */ 586 for (i=0; i<n; i++) { 587 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 588 } 589 ierr = PetscFree(rtmp);CHKERRQ(ierr); 590 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 591 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 592 if (b->inode.use) { 593 C->ops->solve = MatSolve_Inode; 594 } else { 595 C->ops->solve = MatSolve_SeqAIJ; 596 } 597 C->ops->solveadd = MatSolveAdd_SeqAIJ; 598 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 599 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 600 C->ops->matsolve = MatMatSolve_SeqAIJ; 601 C->assembled = PETSC_TRUE; 602 C->preallocated = PETSC_TRUE; 603 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 604 if (sctx.nshift){ 605 if (info->shiftnz) { 606 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 607 } else if (info->shiftpd) { 608 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 609 } 610 } 611 PetscFunctionReturn(0); 612 } 613 614 /* 615 This routine implements inplace ILU(0) with row or/and column permutations. 616 Input: 617 A - original matrix 618 Output; 619 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 620 a->j (col index) is permuted by the inverse of colperm, then sorted 621 a->a reordered accordingly with a->j 622 a->diag (ptr to diagonal elements) is updated. 623 */ 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 627 { 628 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 629 IS isrow = a->row,isicol = a->icol; 630 PetscErrorCode ierr; 631 PetscInt *r,*ic,i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 632 PetscInt *ajtmp,nz,row,*ics; 633 PetscInt *diag = a->diag,nbdiag,*pj; 634 PetscScalar *rtmp,*pc,multiplier,d; 635 MatScalar *v,*pv; 636 PetscReal rs; 637 LUShift_Ctx sctx; 638 PetscInt newshift; 639 640 PetscFunctionBegin; 641 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 642 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 643 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 644 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 645 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 646 ics = ic; 647 648 sctx.shift_top = 0; 649 sctx.nshift_max = 0; 650 sctx.shift_lo = 0; 651 sctx.shift_hi = 0; 652 653 /* if both shift schemes are chosen by user, only use info->shiftpd */ 654 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 655 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 656 sctx.shift_top = 0; 657 for (i=0; i<n; i++) { 658 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 659 d = (a->a)[diag[i]]; 660 rs = -PetscAbsScalar(d) - PetscRealPart(d); 661 v = a->a+ai[i]; 662 nz = ai[i+1] - ai[i]; 663 for (j=0; j<nz; j++) 664 rs += PetscAbsScalar(v[j]); 665 if (rs>sctx.shift_top) sctx.shift_top = rs; 666 } 667 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 668 sctx.shift_top *= 1.1; 669 sctx.nshift_max = 5; 670 sctx.shift_lo = 0.; 671 sctx.shift_hi = 1.; 672 } 673 674 sctx.shift_amount = 0; 675 sctx.nshift = 0; 676 do { 677 sctx.lushift = PETSC_FALSE; 678 for (i=0; i<n; i++){ 679 /* load in initial unfactored row */ 680 nz = ai[r[i]+1] - ai[r[i]]; 681 ajtmp = aj + ai[r[i]]; 682 v = a->a + ai[r[i]]; 683 /* sort permuted ajtmp and values v accordingly */ 684 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 685 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 686 687 diag[r[i]] = ai[r[i]]; 688 for (j=0; j<nz; j++) { 689 rtmp[ajtmp[j]] = v[j]; 690 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 691 } 692 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 693 694 row = *ajtmp++; 695 while (row < i) { 696 pc = rtmp + row; 697 if (*pc != 0.0) { 698 pv = a->a + diag[r[row]]; 699 pj = aj + diag[r[row]] + 1; 700 701 multiplier = *pc / *pv++; 702 *pc = multiplier; 703 nz = ai[r[row]+1] - diag[r[row]] - 1; 704 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 705 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 706 } 707 row = *ajtmp++; 708 } 709 /* finished row so overwrite it onto a->a */ 710 pv = a->a + ai[r[i]] ; 711 pj = aj + ai[r[i]] ; 712 nz = ai[r[i]+1] - ai[r[i]]; 713 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 714 715 rs = 0.0; 716 for (j=0; j<nz; j++) { 717 pv[j] = rtmp[pj[j]]; 718 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 719 } 720 721 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 722 sctx.rs = rs; 723 sctx.pv = pv[nbdiag]; 724 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 725 if (newshift == 1) break; 726 } 727 728 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 729 /* 730 * if no shift in this attempt & shifting & started shifting & can refine, 731 * then try lower shift 732 */ 733 sctx.shift_hi = info->shift_fraction; 734 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 735 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 736 sctx.lushift = PETSC_TRUE; 737 sctx.nshift++; 738 } 739 } while (sctx.lushift); 740 741 /* invert diagonal entries for simplier triangular solves */ 742 for (i=0; i<n; i++) { 743 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 744 } 745 746 ierr = PetscFree(rtmp);CHKERRQ(ierr); 747 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 748 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 749 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 750 A->ops->solveadd = MatSolveAdd_SeqAIJ; 751 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 752 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 753 A->assembled = PETSC_TRUE; 754 A->preallocated = PETSC_TRUE; 755 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 756 if (sctx.nshift){ 757 if (info->shiftnz) { 758 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 759 } else if (info->shiftpd) { 760 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 761 } 762 } 763 PetscFunctionReturn(0); 764 } 765 766 /* ----------------------------------------------------------- */ 767 #undef __FUNCT__ 768 #define __FUNCT__ "MatLUFactor_SeqAIJ" 769 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 770 { 771 PetscErrorCode ierr; 772 Mat C; 773 774 PetscFunctionBegin; 775 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 776 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 777 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 778 A->ops->solve = C->ops->solve; 779 A->ops->solvetranspose = C->ops->solvetranspose; 780 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 781 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 782 PetscFunctionReturn(0); 783 } 784 /* ----------------------------------------------------------- */ 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatSolve_SeqAIJ" 787 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 788 { 789 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 790 IS iscol = a->col,isrow = a->row; 791 PetscErrorCode ierr; 792 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 793 PetscInt nz,*rout,*cout; 794 PetscScalar *x,*tmp,*tmps,sum; 795 const PetscScalar *b; 796 const MatScalar *aa = a->a,*v; 797 798 PetscFunctionBegin; 799 if (!n) PetscFunctionReturn(0); 800 801 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 802 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 803 tmp = a->solve_work; 804 805 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 806 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 807 808 /* forward solve the lower triangular */ 809 tmp[0] = b[*r++]; 810 tmps = tmp; 811 for (i=1; i<n; i++) { 812 v = aa + ai[i] ; 813 vi = aj + ai[i] ; 814 nz = a->diag[i] - ai[i]; 815 sum = b[*r++]; 816 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 817 tmp[i] = sum; 818 } 819 820 /* backward solve the upper triangular */ 821 for (i=n-1; i>=0; i--){ 822 v = aa + a->diag[i] + 1; 823 vi = aj + a->diag[i] + 1; 824 nz = ai[i+1] - a->diag[i] - 1; 825 sum = tmp[i]; 826 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 827 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 828 } 829 830 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 831 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 832 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 833 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 834 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMatSolve_SeqAIJ" 840 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 841 { 842 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 843 IS iscol = a->col,isrow = a->row; 844 PetscErrorCode ierr; 845 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 846 PetscInt nz,*rout,*cout,neq; 847 PetscScalar *x,*b,*tmp,*tmps,sum; 848 const MatScalar *aa = a->a,*v; 849 PetscTruth bisdense,xisdense; 850 851 PetscFunctionBegin; 852 if (!n) PetscFunctionReturn(0); 853 854 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 855 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 856 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 857 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 858 859 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 860 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 861 862 tmp = a->solve_work; 863 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 864 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 865 866 for (neq=0; neq<B->cmap->n; neq++){ 867 /* forward solve the lower triangular */ 868 tmp[0] = b[r[0]]; 869 tmps = tmp; 870 for (i=1; i<n; i++) { 871 v = aa + ai[i] ; 872 vi = aj + ai[i] ; 873 nz = a->diag[i] - ai[i]; 874 sum = b[r[i]]; 875 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 876 tmp[i] = sum; 877 } 878 /* backward solve the upper triangular */ 879 for (i=n-1; i>=0; i--){ 880 v = aa + a->diag[i] + 1; 881 vi = aj + a->diag[i] + 1; 882 nz = ai[i+1] - a->diag[i] - 1; 883 sum = tmp[i]; 884 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 885 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 886 } 887 888 b += n; 889 x += n; 890 } 891 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 892 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 893 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 894 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 895 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 896 PetscFunctionReturn(0); 897 } 898 899 #undef __FUNCT__ 900 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 901 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 902 { 903 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 904 IS iscol = a->col,isrow = a->row; 905 PetscErrorCode ierr; 906 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 907 PetscInt nz,*rout,*cout,row; 908 PetscScalar *x,*b,*tmp,*tmps,sum; 909 const MatScalar *aa = a->a,*v; 910 911 PetscFunctionBegin; 912 if (!n) PetscFunctionReturn(0); 913 914 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 915 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 916 tmp = a->solve_work; 917 918 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 919 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 920 921 /* forward solve the lower triangular */ 922 tmp[0] = b[*r++]; 923 tmps = tmp; 924 for (row=1; row<n; row++) { 925 i = rout[row]; /* permuted row */ 926 v = aa + ai[i] ; 927 vi = aj + ai[i] ; 928 nz = a->diag[i] - ai[i]; 929 sum = b[*r++]; 930 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 931 tmp[row] = sum; 932 } 933 934 /* backward solve the upper triangular */ 935 for (row=n-1; row>=0; row--){ 936 i = rout[row]; /* permuted row */ 937 v = aa + a->diag[i] + 1; 938 vi = aj + a->diag[i] + 1; 939 nz = ai[i+1] - a->diag[i] - 1; 940 sum = tmp[row]; 941 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 942 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 943 } 944 945 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 946 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 947 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 949 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 /* ----------------------------------------------------------- */ 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 956 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 957 { 958 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 959 PetscErrorCode ierr; 960 PetscInt n = A->rmap->n; 961 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 962 PetscScalar *x; 963 const PetscScalar *b; 964 const MatScalar *aa = a->a; 965 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 966 PetscInt adiag_i,i,nz,ai_i; 967 const MatScalar *v; 968 PetscScalar sum; 969 #endif 970 971 PetscFunctionBegin; 972 if (!n) PetscFunctionReturn(0); 973 974 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 975 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 976 977 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 978 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 979 #else 980 /* forward solve the lower triangular */ 981 x[0] = b[0]; 982 for (i=1; i<n; i++) { 983 ai_i = ai[i]; 984 v = aa + ai_i; 985 vi = aj + ai_i; 986 nz = adiag[i] - ai_i; 987 sum = b[i]; 988 while (nz--) sum -= *v++ * x[*vi++]; 989 x[i] = sum; 990 } 991 992 /* backward solve the upper triangular */ 993 for (i=n-1; i>=0; i--){ 994 adiag_i = adiag[i]; 995 v = aa + adiag_i + 1; 996 vi = aj + adiag_i + 1; 997 nz = ai[i+1] - adiag_i - 1; 998 sum = x[i]; 999 while (nz--) sum -= *v++ * x[*vi++]; 1000 x[i] = sum*aa[adiag_i]; 1001 } 1002 #endif 1003 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1004 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1005 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1011 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1012 { 1013 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1014 IS iscol = a->col,isrow = a->row; 1015 PetscErrorCode ierr; 1016 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1017 PetscInt nz,*rout,*cout; 1018 PetscScalar *x,*b,*tmp,sum; 1019 const MatScalar *aa = a->a,*v; 1020 1021 PetscFunctionBegin; 1022 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1023 1024 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1025 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1026 tmp = a->solve_work; 1027 1028 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1029 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1030 1031 /* forward solve the lower triangular */ 1032 tmp[0] = b[*r++]; 1033 for (i=1; i<n; i++) { 1034 v = aa + ai[i] ; 1035 vi = aj + ai[i] ; 1036 nz = a->diag[i] - ai[i]; 1037 sum = b[*r++]; 1038 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1039 tmp[i] = sum; 1040 } 1041 1042 /* backward solve the upper triangular */ 1043 for (i=n-1; i>=0; i--){ 1044 v = aa + a->diag[i] + 1; 1045 vi = aj + a->diag[i] + 1; 1046 nz = ai[i+1] - a->diag[i] - 1; 1047 sum = tmp[i]; 1048 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1049 tmp[i] = sum*aa[a->diag[i]]; 1050 x[*c--] += tmp[i]; 1051 } 1052 1053 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1054 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1055 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1056 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1057 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1058 1059 PetscFunctionReturn(0); 1060 } 1061 /* -------------------------------------------------------------------*/ 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1064 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1065 { 1066 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1067 IS iscol = a->col,isrow = a->row; 1068 PetscErrorCode ierr; 1069 PetscInt *r,*c,i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1070 PetscInt nz,*rout,*cout,*diag = a->diag; 1071 PetscScalar *x,*b,*tmp,s1; 1072 const MatScalar *aa = a->a,*v; 1073 1074 PetscFunctionBegin; 1075 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1076 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1077 tmp = a->solve_work; 1078 1079 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1080 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1081 1082 /* copy the b into temp work space according to permutation */ 1083 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1084 1085 /* forward solve the U^T */ 1086 for (i=0; i<n; i++) { 1087 v = aa + diag[i] ; 1088 vi = aj + diag[i] + 1; 1089 nz = ai[i+1] - diag[i] - 1; 1090 s1 = tmp[i]; 1091 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1092 while (nz--) { 1093 tmp[*vi++ ] -= (*v++)*s1; 1094 } 1095 tmp[i] = s1; 1096 } 1097 1098 /* backward solve the L^T */ 1099 for (i=n-1; i>=0; i--){ 1100 v = aa + diag[i] - 1 ; 1101 vi = aj + diag[i] - 1 ; 1102 nz = diag[i] - ai[i]; 1103 s1 = tmp[i]; 1104 while (nz--) { 1105 tmp[*vi-- ] -= (*v--)*s1; 1106 } 1107 } 1108 1109 /* copy tmp into x according to permutation */ 1110 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1111 1112 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1113 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1114 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1115 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1116 1117 ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1123 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1124 { 1125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1126 IS iscol = a->col,isrow = a->row; 1127 PetscErrorCode ierr; 1128 PetscInt *r,*c,i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1129 PetscInt nz,*rout,*cout,*diag = a->diag; 1130 PetscScalar *x,*b,*tmp; 1131 const MatScalar *aa = a->a,*v; 1132 1133 PetscFunctionBegin; 1134 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1135 1136 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1137 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1138 tmp = a->solve_work; 1139 1140 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1141 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1142 1143 /* copy the b into temp work space according to permutation */ 1144 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1145 1146 /* forward solve the U^T */ 1147 for (i=0; i<n; i++) { 1148 v = aa + diag[i] ; 1149 vi = aj + diag[i] + 1; 1150 nz = ai[i+1] - diag[i] - 1; 1151 tmp[i] *= *v++; 1152 while (nz--) { 1153 tmp[*vi++ ] -= (*v++)*tmp[i]; 1154 } 1155 } 1156 1157 /* backward solve the L^T */ 1158 for (i=n-1; i>=0; i--){ 1159 v = aa + diag[i] - 1 ; 1160 vi = aj + diag[i] - 1 ; 1161 nz = diag[i] - ai[i]; 1162 while (nz--) { 1163 tmp[*vi-- ] -= (*v--)*tmp[i]; 1164 } 1165 } 1166 1167 /* copy tmp into x according to permutation */ 1168 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1169 1170 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1171 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1172 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1173 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1174 1175 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 /* ----------------------------------------------------------------*/ 1179 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1180 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,MatDuplicateOption,Mat*); 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1184 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1185 { 1186 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1187 IS isicol; 1188 PetscErrorCode ierr; 1189 PetscInt *r,*ic,n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1190 PetscInt *bi,*cols,nnz,*cols_lvl; 1191 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1192 PetscInt i,levels,diagonal_fill; 1193 PetscTruth col_identity,row_identity; 1194 PetscReal f; 1195 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1196 PetscBT lnkbt; 1197 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1198 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1199 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1200 PetscTruth missing; 1201 1202 PetscFunctionBegin; 1203 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1204 f = info->fill; 1205 levels = (PetscInt)info->levels; 1206 diagonal_fill = (PetscInt)info->diagonal_fill; 1207 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1208 1209 /* special case that simply copies fill pattern */ 1210 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1211 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1212 if (!levels && row_identity && col_identity) { 1213 ierr = MatDuplicateNoCreate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1214 (*fact)->factor = MAT_FACTOR_LU; 1215 (*fact)->info.factor_mallocs = 0; 1216 (*fact)->info.fill_ratio_given = info->fill; 1217 (*fact)->info.fill_ratio_needed = 1.0; 1218 b = (Mat_SeqAIJ*)(*fact)->data; 1219 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1220 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1221 b->row = isrow; 1222 b->col = iscol; 1223 b->icol = isicol; 1224 ierr = PetscMalloc(((*fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1225 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1226 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1227 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1228 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1229 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1230 PetscFunctionReturn(0); 1231 } 1232 1233 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1234 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1235 1236 /* get new row pointers */ 1237 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1238 bi[0] = 0; 1239 /* bdiag is location of diagonal in factor */ 1240 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1241 bdiag[0] = 0; 1242 1243 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1244 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1245 1246 /* create a linked list for storing column indices of the active row */ 1247 nlnk = n + 1; 1248 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1249 1250 /* initial FreeSpace size is f*(ai[n]+1) */ 1251 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1252 current_space = free_space; 1253 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1254 current_space_lvl = free_space_lvl; 1255 1256 for (i=0; i<n; i++) { 1257 nzi = 0; 1258 /* copy current row into linked list */ 1259 nnz = ai[r[i]+1] - ai[r[i]]; 1260 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1261 cols = aj + ai[r[i]]; 1262 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1263 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1264 nzi += nlnk; 1265 1266 /* make sure diagonal entry is included */ 1267 if (diagonal_fill && lnk[i] == -1) { 1268 fm = n; 1269 while (lnk[fm] < i) fm = lnk[fm]; 1270 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1271 lnk[fm] = i; 1272 lnk_lvl[i] = 0; 1273 nzi++; dcount++; 1274 } 1275 1276 /* add pivot rows into the active row */ 1277 nzbd = 0; 1278 prow = lnk[n]; 1279 while (prow < i) { 1280 nnz = bdiag[prow]; 1281 cols = bj_ptr[prow] + nnz + 1; 1282 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1283 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1284 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1285 nzi += nlnk; 1286 prow = lnk[prow]; 1287 nzbd++; 1288 } 1289 bdiag[i] = nzbd; 1290 bi[i+1] = bi[i] + nzi; 1291 1292 /* if free space is not available, make more free space */ 1293 if (current_space->local_remaining<nzi) { 1294 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1295 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1296 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1297 reallocs++; 1298 } 1299 1300 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1301 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1302 bj_ptr[i] = current_space->array; 1303 bjlvl_ptr[i] = current_space_lvl->array; 1304 1305 /* make sure the active row i has diagonal entry */ 1306 if (*(bj_ptr[i]+bdiag[i]) != i) { 1307 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1308 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1309 } 1310 1311 current_space->array += nzi; 1312 current_space->local_used += nzi; 1313 current_space->local_remaining -= nzi; 1314 current_space_lvl->array += nzi; 1315 current_space_lvl->local_used += nzi; 1316 current_space_lvl->local_remaining -= nzi; 1317 } 1318 1319 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1320 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1321 1322 /* destroy list of free space and other temporary arrays */ 1323 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1324 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1325 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1326 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1327 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1328 1329 #if defined(PETSC_USE_INFO) 1330 { 1331 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1332 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1333 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1334 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1335 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1336 if (diagonal_fill) { 1337 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1338 } 1339 } 1340 #endif 1341 1342 /* put together the new matrix */ 1343 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1344 b = (Mat_SeqAIJ*)(*fact)->data; 1345 b->free_a = PETSC_TRUE; 1346 b->free_ij = PETSC_TRUE; 1347 b->singlemalloc = PETSC_FALSE; 1348 len = (bi[n] )*sizeof(PetscScalar); 1349 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1350 b->j = bj; 1351 b->i = bi; 1352 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1353 b->diag = bdiag; 1354 b->ilen = 0; 1355 b->imax = 0; 1356 b->row = isrow; 1357 b->col = iscol; 1358 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1359 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1360 b->icol = isicol; 1361 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1362 /* In b structure: Free imax, ilen, old a, old j. 1363 Allocate bdiag, solve_work, new a, new j */ 1364 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1365 b->maxnz = b->nz = bi[n] ; 1366 (*fact)->factor = MAT_FACTOR_LU; 1367 (*fact)->info.factor_mallocs = reallocs; 1368 (*fact)->info.fill_ratio_given = f; 1369 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1370 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1371 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #include "src/mat/impls/sbaij/seq/sbaij.h" 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1378 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1379 { 1380 Mat C = *B; 1381 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1382 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1383 IS ip=b->row,iip = b->icol; 1384 PetscErrorCode ierr; 1385 PetscInt *rip,*riip,i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1386 PetscInt *ai=a->i,*aj=a->j; 1387 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1388 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1389 PetscReal zeropivot,rs,shiftnz; 1390 PetscReal shiftpd; 1391 ChShift_Ctx sctx; 1392 PetscInt newshift; 1393 PetscTruth perm_identity; 1394 1395 PetscFunctionBegin; 1396 1397 shiftnz = info->shiftnz; 1398 shiftpd = info->shiftpd; 1399 zeropivot = info->zeropivot; 1400 1401 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1402 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1403 1404 /* initialization */ 1405 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1406 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1407 jl = il + mbs; 1408 rtmp = (MatScalar*)(jl + mbs); 1409 1410 sctx.shift_amount = 0; 1411 sctx.nshift = 0; 1412 do { 1413 sctx.chshift = PETSC_FALSE; 1414 for (i=0; i<mbs; i++) { 1415 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1416 } 1417 1418 for (k = 0; k<mbs; k++){ 1419 bval = ba + bi[k]; 1420 /* initialize k-th row by the perm[k]-th row of A */ 1421 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1422 for (j = jmin; j < jmax; j++){ 1423 col = riip[aj[j]]; 1424 if (col >= k){ /* only take upper triangular entry */ 1425 rtmp[col] = aa[j]; 1426 *bval++ = 0.0; /* for in-place factorization */ 1427 } 1428 } 1429 /* shift the diagonal of the matrix */ 1430 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1431 1432 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1433 dk = rtmp[k]; 1434 i = jl[k]; /* first row to be added to k_th row */ 1435 1436 while (i < k){ 1437 nexti = jl[i]; /* next row to be added to k_th row */ 1438 1439 /* compute multiplier, update diag(k) and U(i,k) */ 1440 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1441 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1442 dk += uikdi*ba[ili]; 1443 ba[ili] = uikdi; /* -U(i,k) */ 1444 1445 /* add multiple of row i to k-th row */ 1446 jmin = ili + 1; jmax = bi[i+1]; 1447 if (jmin < jmax){ 1448 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1449 /* update il and jl for row i */ 1450 il[i] = jmin; 1451 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1452 } 1453 i = nexti; 1454 } 1455 1456 /* shift the diagonals when zero pivot is detected */ 1457 /* compute rs=sum of abs(off-diagonal) */ 1458 rs = 0.0; 1459 jmin = bi[k]+1; 1460 nz = bi[k+1] - jmin; 1461 bcol = bj + jmin; 1462 while (nz--){ 1463 rs += PetscAbsScalar(rtmp[*bcol]); 1464 bcol++; 1465 } 1466 1467 sctx.rs = rs; 1468 sctx.pv = dk; 1469 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1470 1471 if (newshift == 1) { 1472 if (!sctx.shift_amount) { 1473 sctx.shift_amount = 1e-5; 1474 } 1475 break; 1476 } 1477 1478 /* copy data into U(k,:) */ 1479 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1480 jmin = bi[k]+1; jmax = bi[k+1]; 1481 if (jmin < jmax) { 1482 for (j=jmin; j<jmax; j++){ 1483 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1484 } 1485 /* add the k-th row into il and jl */ 1486 il[k] = jmin; 1487 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1488 } 1489 } 1490 } while (sctx.chshift); 1491 ierr = PetscFree(il);CHKERRQ(ierr); 1492 1493 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1494 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1495 1496 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1497 if (perm_identity){ 1498 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1499 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1500 (*B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1501 (*B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1502 } else { 1503 (*B)->ops->solve = MatSolve_SeqSBAIJ_1; 1504 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1505 (*B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1506 (*B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1507 } 1508 1509 C->assembled = PETSC_TRUE; 1510 C->preallocated = PETSC_TRUE; 1511 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1512 if (sctx.nshift){ 1513 if (shiftnz) { 1514 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1515 } else if (shiftpd) { 1516 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1517 } 1518 } 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1524 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1525 { 1526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1527 Mat_SeqSBAIJ *b; 1528 PetscErrorCode ierr; 1529 PetscTruth perm_identity,missing; 1530 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1531 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1532 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1533 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1534 PetscReal fill=info->fill,levels=info->levels; 1535 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1536 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1537 PetscBT lnkbt; 1538 IS iperm; 1539 1540 PetscFunctionBegin; 1541 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1542 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1543 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1544 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1545 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1546 1547 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1548 ui[0] = 0; 1549 1550 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1551 if (!levels && perm_identity) { 1552 1553 for (i=0; i<am; i++) { 1554 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1555 } 1556 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1557 cols = uj; 1558 for (i=0; i<am; i++) { 1559 aj = a->j + a->diag[i]; 1560 ncols = ui[i+1] - ui[i]; 1561 for (j=0; j<ncols; j++) *cols++ = *aj++; 1562 } 1563 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1564 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1565 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1566 1567 /* initialization */ 1568 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1569 1570 /* jl: linked list for storing indices of the pivot rows 1571 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1572 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1573 il = jl + am; 1574 uj_ptr = (PetscInt**)(il + am); 1575 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1576 for (i=0; i<am; i++){ 1577 jl[i] = am; il[i] = 0; 1578 } 1579 1580 /* create and initialize a linked list for storing column indices of the active row k */ 1581 nlnk = am + 1; 1582 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1583 1584 /* initial FreeSpace size is fill*(ai[am]+1) */ 1585 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1586 current_space = free_space; 1587 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1588 current_space_lvl = free_space_lvl; 1589 1590 for (k=0; k<am; k++){ /* for each active row k */ 1591 /* initialize lnk by the column indices of row rip[k] of A */ 1592 nzk = 0; 1593 ncols = ai[rip[k]+1] - ai[rip[k]]; 1594 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1595 ncols_upper = 0; 1596 for (j=0; j<ncols; j++){ 1597 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1598 if (riip[i] >= k){ /* only take upper triangular entry */ 1599 ajtmp[ncols_upper] = i; 1600 ncols_upper++; 1601 } 1602 } 1603 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1604 nzk += nlnk; 1605 1606 /* update lnk by computing fill-in for each pivot row to be merged in */ 1607 prow = jl[k]; /* 1st pivot row */ 1608 1609 while (prow < k){ 1610 nextprow = jl[prow]; 1611 1612 /* merge prow into k-th row */ 1613 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1614 jmax = ui[prow+1]; 1615 ncols = jmax-jmin; 1616 i = jmin - ui[prow]; 1617 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1618 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1619 j = *(uj - 1); 1620 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1621 nzk += nlnk; 1622 1623 /* update il and jl for prow */ 1624 if (jmin < jmax){ 1625 il[prow] = jmin; 1626 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1627 } 1628 prow = nextprow; 1629 } 1630 1631 /* if free space is not available, make more free space */ 1632 if (current_space->local_remaining<nzk) { 1633 i = am - k + 1; /* num of unfactored rows */ 1634 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1635 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1636 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1637 reallocs++; 1638 } 1639 1640 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1641 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1642 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1643 1644 /* add the k-th row into il and jl */ 1645 if (nzk > 1){ 1646 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1647 jl[k] = jl[i]; jl[i] = k; 1648 il[k] = ui[k] + 1; 1649 } 1650 uj_ptr[k] = current_space->array; 1651 uj_lvl_ptr[k] = current_space_lvl->array; 1652 1653 current_space->array += nzk; 1654 current_space->local_used += nzk; 1655 current_space->local_remaining -= nzk; 1656 1657 current_space_lvl->array += nzk; 1658 current_space_lvl->local_used += nzk; 1659 current_space_lvl->local_remaining -= nzk; 1660 1661 ui[k+1] = ui[k] + nzk; 1662 } 1663 1664 #if defined(PETSC_USE_INFO) 1665 if (ai[am] != 0) { 1666 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1667 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1668 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1669 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1670 } else { 1671 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1672 } 1673 #endif 1674 1675 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1676 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1677 ierr = PetscFree(jl);CHKERRQ(ierr); 1678 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1679 1680 /* destroy list of free space and other temporary array(s) */ 1681 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1682 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1683 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1684 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1685 1686 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1687 1688 /* put together the new matrix in MATSEQSBAIJ format */ 1689 1690 b = (Mat_SeqSBAIJ*)(*fact)->data; 1691 b->singlemalloc = PETSC_FALSE; 1692 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1693 b->j = uj; 1694 b->i = ui; 1695 b->diag = 0; 1696 b->ilen = 0; 1697 b->imax = 0; 1698 b->row = perm; 1699 b->col = perm; 1700 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1701 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1702 b->icol = iperm; 1703 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1704 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1705 ierr = PetscLogObjectMemory((*fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1706 b->maxnz = b->nz = ui[am]; 1707 b->free_a = PETSC_TRUE; 1708 b->free_ij = PETSC_TRUE; 1709 1710 (*fact)->info.factor_mallocs = reallocs; 1711 (*fact)->info.fill_ratio_given = fill; 1712 if (ai[am] != 0) { 1713 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1714 } else { 1715 (*fact)->info.fill_ratio_needed = 0.0; 1716 } 1717 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1723 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1724 { 1725 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1726 Mat_SeqSBAIJ *b; 1727 PetscErrorCode ierr; 1728 PetscTruth perm_identity; 1729 PetscReal fill = info->fill; 1730 PetscInt *rip,*riip,i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1731 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1732 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1733 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1734 PetscBT lnkbt; 1735 IS iperm; 1736 1737 PetscFunctionBegin; 1738 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1739 /* check whether perm is the identity mapping */ 1740 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1741 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1742 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1743 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1744 1745 /* initialization */ 1746 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1747 ui[0] = 0; 1748 1749 /* jl: linked list for storing indices of the pivot rows 1750 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1751 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1752 il = jl + am; 1753 cols = il + am; 1754 ui_ptr = (PetscInt**)(cols + am); 1755 for (i=0; i<am; i++){ 1756 jl[i] = am; il[i] = 0; 1757 } 1758 1759 /* create and initialize a linked list for storing column indices of the active row k */ 1760 nlnk = am + 1; 1761 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1762 1763 /* initial FreeSpace size is fill*(ai[am]+1) */ 1764 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1765 current_space = free_space; 1766 1767 for (k=0; k<am; k++){ /* for each active row k */ 1768 /* initialize lnk by the column indices of row rip[k] of A */ 1769 nzk = 0; 1770 ncols = ai[rip[k]+1] - ai[rip[k]]; 1771 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1772 ncols_upper = 0; 1773 for (j=0; j<ncols; j++){ 1774 i = riip[*(aj + ai[rip[k]] + j)]; 1775 if (i >= k){ /* only take upper triangular entry */ 1776 cols[ncols_upper] = i; 1777 ncols_upper++; 1778 } 1779 } 1780 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1781 nzk += nlnk; 1782 1783 /* update lnk by computing fill-in for each pivot row to be merged in */ 1784 prow = jl[k]; /* 1st pivot row */ 1785 1786 while (prow < k){ 1787 nextprow = jl[prow]; 1788 /* merge prow into k-th row */ 1789 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1790 jmax = ui[prow+1]; 1791 ncols = jmax-jmin; 1792 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1793 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1794 nzk += nlnk; 1795 1796 /* update il and jl for prow */ 1797 if (jmin < jmax){ 1798 il[prow] = jmin; 1799 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1800 } 1801 prow = nextprow; 1802 } 1803 1804 /* if free space is not available, make more free space */ 1805 if (current_space->local_remaining<nzk) { 1806 i = am - k + 1; /* num of unfactored rows */ 1807 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1808 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1809 reallocs++; 1810 } 1811 1812 /* copy data into free space, then initialize lnk */ 1813 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1814 1815 /* add the k-th row into il and jl */ 1816 if (nzk-1 > 0){ 1817 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1818 jl[k] = jl[i]; jl[i] = k; 1819 il[k] = ui[k] + 1; 1820 } 1821 ui_ptr[k] = current_space->array; 1822 current_space->array += nzk; 1823 current_space->local_used += nzk; 1824 current_space->local_remaining -= nzk; 1825 1826 ui[k+1] = ui[k] + nzk; 1827 } 1828 1829 #if defined(PETSC_USE_INFO) 1830 if (ai[am] != 0) { 1831 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1832 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1833 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1834 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1835 } else { 1836 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1837 } 1838 #endif 1839 1840 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1841 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1842 ierr = PetscFree(jl);CHKERRQ(ierr); 1843 1844 /* destroy list of free space and other temporary array(s) */ 1845 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1846 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1847 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1848 1849 /* put together the new matrix in MATSEQSBAIJ format */ 1850 1851 b = (Mat_SeqSBAIJ*)(*fact)->data; 1852 b->singlemalloc = PETSC_FALSE; 1853 b->free_a = PETSC_TRUE; 1854 b->free_ij = PETSC_TRUE; 1855 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1856 b->j = uj; 1857 b->i = ui; 1858 b->diag = 0; 1859 b->ilen = 0; 1860 b->imax = 0; 1861 b->row = perm; 1862 b->col = perm; 1863 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1864 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1865 b->icol = iperm; 1866 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1867 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1868 ierr = PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1869 b->maxnz = b->nz = ui[am]; 1870 1871 (*fact)->info.factor_mallocs = reallocs; 1872 (*fact)->info.fill_ratio_given = fill; 1873 if (ai[am] != 0) { 1874 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1875 } else { 1876 (*fact)->info.fill_ratio_needed = 0.0; 1877 } 1878 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1879 PetscFunctionReturn(0); 1880 } 1881