1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap->n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = MAT_FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 EXTERN_C_BEGIN 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 261 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 262 { 263 PetscFunctionBegin; 264 *flg = PETSC_TRUE; 265 PetscFunctionReturn(0); 266 } 267 EXTERN_C_END 268 269 EXTERN_C_BEGIN 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 272 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 273 { 274 PetscInt n = A->rmap->n; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 279 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 280 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 281 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 282 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 283 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 284 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 285 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 286 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 287 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 288 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 289 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 290 (*B)->factor = ftype; 291 PetscFunctionReturn(0); 292 } 293 EXTERN_C_END 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 297 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,MatFactorInfo *info) 298 { 299 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 300 IS isicol; 301 PetscErrorCode ierr; 302 PetscInt *r,*ic,i,n=A->rmap->n,*ai=a->i,*aj=a->j; 303 PetscInt *bi,*bj,*ajtmp; 304 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 305 PetscReal f; 306 PetscInt nlnk,*lnk,k,**bi_ptr; 307 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 308 PetscBT lnkbt; 309 310 PetscFunctionBegin; 311 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 312 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 313 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 314 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 315 316 /* get new row pointers */ 317 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 318 bi[0] = 0; 319 320 /* bdiag is location of diagonal in factor */ 321 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 322 bdiag[0] = 0; 323 324 /* linked list for storing column indices of the active row */ 325 nlnk = n + 1; 326 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 327 328 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 329 330 /* initial FreeSpace size is f*(ai[n]+1) */ 331 f = info->fill; 332 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 333 current_space = free_space; 334 335 for (i=0; i<n; i++) { 336 /* copy previous fill into linked list */ 337 nzi = 0; 338 nnz = ai[r[i]+1] - ai[r[i]]; 339 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 340 ajtmp = aj + ai[r[i]]; 341 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 342 nzi += nlnk; 343 344 /* add pivot rows into linked list */ 345 row = lnk[n]; 346 while (row < i) { 347 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 348 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 349 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 350 nzi += nlnk; 351 row = lnk[row]; 352 } 353 bi[i+1] = bi[i] + nzi; 354 im[i] = nzi; 355 356 /* mark bdiag */ 357 nzbd = 0; 358 nnz = nzi; 359 k = lnk[n]; 360 while (nnz-- && k < i){ 361 nzbd++; 362 k = lnk[k]; 363 } 364 bdiag[i] = bi[i] + nzbd; 365 366 /* if free space is not available, make more free space */ 367 if (current_space->local_remaining<nzi) { 368 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 369 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 370 reallocs++; 371 } 372 373 /* copy data into free space, then initialize lnk */ 374 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 375 bi_ptr[i] = current_space->array; 376 current_space->array += nzi; 377 current_space->local_used += nzi; 378 current_space->local_remaining -= nzi; 379 } 380 #if defined(PETSC_USE_INFO) 381 if (ai[n] != 0) { 382 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 383 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 384 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 385 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 386 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 387 } else { 388 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 389 } 390 #endif 391 392 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 393 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 394 395 /* destroy list of free space and other temporary array(s) */ 396 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 397 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 398 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 399 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 400 401 /* put together the new matrix */ 402 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 403 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 404 b = (Mat_SeqAIJ*)(B)->data; 405 b->free_a = PETSC_TRUE; 406 b->free_ij = PETSC_TRUE; 407 b->singlemalloc = PETSC_FALSE; 408 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 409 b->j = bj; 410 b->i = bi; 411 b->diag = bdiag; 412 b->ilen = 0; 413 b->imax = 0; 414 b->row = isrow; 415 b->col = iscol; 416 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 417 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 418 b->icol = isicol; 419 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 420 421 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 422 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 423 b->maxnz = b->nz = bi[n] ; 424 425 (B)->factor = MAT_FACTOR_LU; 426 (B)->info.factor_mallocs = reallocs; 427 (B)->info.fill_ratio_given = f; 428 429 if (ai[n] != 0) { 430 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 431 } else { 432 (B)->info.fill_ratio_needed = 0.0; 433 } 434 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 435 (B)->ops->solve = MatSolve_SeqAIJ; 436 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 437 /* switch to inodes if appropriate */ 438 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 /* 443 Trouble in factorization, should we dump the original matrix? 444 */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatFactorDumpMatrix" 447 PetscErrorCode MatFactorDumpMatrix(Mat A) 448 { 449 PetscErrorCode ierr; 450 PetscTruth flg; 451 452 PetscFunctionBegin; 453 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 454 if (flg) { 455 PetscViewer viewer; 456 char filename[PETSC_MAX_PATH_LEN]; 457 458 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 459 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 460 ierr = MatView(A,viewer);CHKERRQ(ierr); 461 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 462 } 463 PetscFunctionReturn(0); 464 } 465 466 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 467 468 /* ----------------------------------------------------------- */ 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 471 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,MatFactorInfo *info) 472 { 473 Mat C=B; 474 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 475 IS isrow = b->row,isicol = b->icol; 476 PetscErrorCode ierr; 477 PetscInt *r,*ic,i,j,n=A->rmap->n,*bi=b->i,*bj=b->j; 478 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 479 PetscInt *diag_offset = b->diag,diag,*pj; 480 PetscScalar *rtmp,*pc,multiplier,*rtmps; 481 MatScalar *v,*pv; 482 PetscScalar d; 483 PetscReal rs; 484 LUShift_Ctx sctx; 485 PetscInt newshift,*ddiag; 486 487 PetscFunctionBegin; 488 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 489 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 490 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 491 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 492 rtmps = rtmp; ics = ic; 493 494 sctx.shift_top = 0; 495 sctx.nshift_max = 0; 496 sctx.shift_lo = 0; 497 sctx.shift_hi = 0; 498 499 /* if both shift schemes are chosen by user, only use info->shiftpd */ 500 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 501 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 502 PetscInt *aai = a->i; 503 ddiag = a->diag; 504 sctx.shift_top = 0; 505 for (i=0; i<n; i++) { 506 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 507 d = (a->a)[ddiag[i]]; 508 rs = -PetscAbsScalar(d) - PetscRealPart(d); 509 v = a->a+aai[i]; 510 nz = aai[i+1] - aai[i]; 511 for (j=0; j<nz; j++) 512 rs += PetscAbsScalar(v[j]); 513 if (rs>sctx.shift_top) sctx.shift_top = rs; 514 } 515 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 516 sctx.shift_top *= 1.1; 517 sctx.nshift_max = 5; 518 sctx.shift_lo = 0.; 519 sctx.shift_hi = 1.; 520 } 521 522 sctx.shift_amount = 0; 523 sctx.nshift = 0; 524 do { 525 sctx.lushift = PETSC_FALSE; 526 for (i=0; i<n; i++){ 527 nz = bi[i+1] - bi[i]; 528 bjtmp = bj + bi[i]; 529 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 530 531 /* load in initial (unfactored row) */ 532 nz = a->i[r[i]+1] - a->i[r[i]]; 533 ajtmp = a->j + a->i[r[i]]; 534 v = a->a + a->i[r[i]]; 535 for (j=0; j<nz; j++) { 536 rtmp[ics[ajtmp[j]]] = v[j]; 537 } 538 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 539 540 row = *bjtmp++; 541 while (row < i) { 542 pc = rtmp + row; 543 if (*pc != 0.0) { 544 pv = b->a + diag_offset[row]; 545 pj = b->j + diag_offset[row] + 1; 546 multiplier = *pc / *pv++; 547 *pc = multiplier; 548 nz = bi[row+1] - diag_offset[row] - 1; 549 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 550 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 551 } 552 row = *bjtmp++; 553 } 554 /* finished row so stick it into b->a */ 555 pv = b->a + bi[i] ; 556 pj = b->j + bi[i] ; 557 nz = bi[i+1] - bi[i]; 558 diag = diag_offset[i] - bi[i]; 559 rs = 0.0; 560 for (j=0; j<nz; j++) { 561 pv[j] = rtmps[pj[j]]; 562 if (j != diag) rs += PetscAbsScalar(pv[j]); 563 } 564 565 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 566 sctx.rs = rs; 567 sctx.pv = pv[diag]; 568 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 569 if (newshift == 1) break; 570 } 571 572 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 573 /* 574 * if no shift in this attempt & shifting & started shifting & can refine, 575 * then try lower shift 576 */ 577 sctx.shift_hi = info->shift_fraction; 578 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 579 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 580 sctx.lushift = PETSC_TRUE; 581 sctx.nshift++; 582 } 583 } while (sctx.lushift); 584 585 /* invert diagonal entries for simplier triangular solves */ 586 for (i=0; i<n; i++) { 587 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 588 } 589 ierr = PetscFree(rtmp);CHKERRQ(ierr); 590 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 591 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 592 if (b->inode.use) { 593 C->ops->solve = MatSolve_Inode; 594 } else { 595 C->ops->solve = MatSolve_SeqAIJ; 596 } 597 C->ops->solveadd = MatSolveAdd_SeqAIJ; 598 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 599 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 600 C->ops->matsolve = MatMatSolve_SeqAIJ; 601 C->assembled = PETSC_TRUE; 602 C->preallocated = PETSC_TRUE; 603 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 604 if (sctx.nshift){ 605 if (info->shiftnz) { 606 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 607 } else if (info->shiftpd) { 608 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 609 } 610 } 611 PetscFunctionReturn(0); 612 } 613 614 /* 615 This routine implements inplace ILU(0) with row or/and column permutations. 616 Input: 617 A - original matrix 618 Output; 619 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 620 a->j (col index) is permuted by the inverse of colperm, then sorted 621 a->a reordered accordingly with a->j 622 a->diag (ptr to diagonal elements) is updated. 623 */ 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,MatFactorInfo *info) 627 { 628 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 629 IS isrow = a->row,isicol = a->icol; 630 PetscErrorCode ierr; 631 PetscInt *r,*ic,i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 632 PetscInt *ajtmp,nz,row,*ics; 633 PetscInt *diag = a->diag,nbdiag,*pj; 634 PetscScalar *rtmp,*pc,multiplier,d; 635 MatScalar *v,*pv; 636 PetscReal rs; 637 LUShift_Ctx sctx; 638 PetscInt newshift; 639 640 PetscFunctionBegin; 641 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 642 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 643 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 644 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 645 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 646 ics = ic; 647 648 sctx.shift_top = 0; 649 sctx.nshift_max = 0; 650 sctx.shift_lo = 0; 651 sctx.shift_hi = 0; 652 653 /* if both shift schemes are chosen by user, only use info->shiftpd */ 654 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 655 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 656 sctx.shift_top = 0; 657 for (i=0; i<n; i++) { 658 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 659 d = (a->a)[diag[i]]; 660 rs = -PetscAbsScalar(d) - PetscRealPart(d); 661 v = a->a+ai[i]; 662 nz = ai[i+1] - ai[i]; 663 for (j=0; j<nz; j++) 664 rs += PetscAbsScalar(v[j]); 665 if (rs>sctx.shift_top) sctx.shift_top = rs; 666 } 667 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 668 sctx.shift_top *= 1.1; 669 sctx.nshift_max = 5; 670 sctx.shift_lo = 0.; 671 sctx.shift_hi = 1.; 672 } 673 674 sctx.shift_amount = 0; 675 sctx.nshift = 0; 676 do { 677 sctx.lushift = PETSC_FALSE; 678 for (i=0; i<n; i++){ 679 /* load in initial unfactored row */ 680 nz = ai[r[i]+1] - ai[r[i]]; 681 ajtmp = aj + ai[r[i]]; 682 v = a->a + ai[r[i]]; 683 /* sort permuted ajtmp and values v accordingly */ 684 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 685 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 686 687 diag[r[i]] = ai[r[i]]; 688 for (j=0; j<nz; j++) { 689 rtmp[ajtmp[j]] = v[j]; 690 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 691 } 692 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 693 694 row = *ajtmp++; 695 while (row < i) { 696 pc = rtmp + row; 697 if (*pc != 0.0) { 698 pv = a->a + diag[r[row]]; 699 pj = aj + diag[r[row]] + 1; 700 701 multiplier = *pc / *pv++; 702 *pc = multiplier; 703 nz = ai[r[row]+1] - diag[r[row]] - 1; 704 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 705 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 706 } 707 row = *ajtmp++; 708 } 709 /* finished row so overwrite it onto a->a */ 710 pv = a->a + ai[r[i]] ; 711 pj = aj + ai[r[i]] ; 712 nz = ai[r[i]+1] - ai[r[i]]; 713 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 714 715 rs = 0.0; 716 for (j=0; j<nz; j++) { 717 pv[j] = rtmp[pj[j]]; 718 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 719 } 720 721 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 722 sctx.rs = rs; 723 sctx.pv = pv[nbdiag]; 724 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 725 if (newshift == 1) break; 726 } 727 728 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 729 /* 730 * if no shift in this attempt & shifting & started shifting & can refine, 731 * then try lower shift 732 */ 733 sctx.shift_hi = info->shift_fraction; 734 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 735 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 736 sctx.lushift = PETSC_TRUE; 737 sctx.nshift++; 738 } 739 } while (sctx.lushift); 740 741 /* invert diagonal entries for simplier triangular solves */ 742 for (i=0; i<n; i++) { 743 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 744 } 745 746 ierr = PetscFree(rtmp);CHKERRQ(ierr); 747 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 748 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 749 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 750 A->ops->solveadd = MatSolveAdd_SeqAIJ; 751 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 752 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 753 A->assembled = PETSC_TRUE; 754 A->preallocated = PETSC_TRUE; 755 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 756 if (sctx.nshift){ 757 if (info->shiftnz) { 758 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 759 } else if (info->shiftpd) { 760 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 761 } 762 } 763 PetscFunctionReturn(0); 764 } 765 766 /* ----------------------------------------------------------- */ 767 #undef __FUNCT__ 768 #define __FUNCT__ "MatLUFactor_SeqAIJ" 769 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 770 { 771 PetscErrorCode ierr; 772 Mat C; 773 774 PetscFunctionBegin; 775 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 776 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 777 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 778 A->ops->solve = C->ops->solve; 779 A->ops->solvetranspose = C->ops->solvetranspose; 780 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 781 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 782 PetscFunctionReturn(0); 783 } 784 /* ----------------------------------------------------------- */ 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatSolve_SeqAIJ" 787 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 788 { 789 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 790 IS iscol = a->col,isrow = a->row; 791 PetscErrorCode ierr; 792 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 793 PetscInt nz,*rout,*cout; 794 PetscScalar *x,*tmp,*tmps,sum; 795 const PetscScalar *b; 796 const MatScalar *aa = a->a,*v; 797 798 PetscFunctionBegin; 799 if (!n) PetscFunctionReturn(0); 800 801 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 802 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 803 tmp = a->solve_work; 804 805 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 806 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 807 808 /* forward solve the lower triangular */ 809 tmp[0] = b[*r++]; 810 tmps = tmp; 811 for (i=1; i<n; i++) { 812 v = aa + ai[i] ; 813 vi = aj + ai[i] ; 814 nz = a->diag[i] - ai[i]; 815 sum = b[*r++]; 816 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 817 tmp[i] = sum; 818 } 819 820 /* backward solve the upper triangular */ 821 for (i=n-1; i>=0; i--){ 822 v = aa + a->diag[i] + 1; 823 vi = aj + a->diag[i] + 1; 824 nz = ai[i+1] - a->diag[i] - 1; 825 sum = tmp[i]; 826 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 827 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 828 } 829 830 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 831 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 832 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 833 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 834 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMatSolve_SeqAIJ" 840 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 841 { 842 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 843 IS iscol = a->col,isrow = a->row; 844 PetscErrorCode ierr; 845 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 846 PetscInt nz,*rout,*cout,neq; 847 PetscScalar *x,*b,*tmp,*tmps,sum; 848 const MatScalar *aa = a->a,*v; 849 PetscTruth bisdense,xisdense; 850 851 PetscFunctionBegin; 852 if (!n) PetscFunctionReturn(0); 853 854 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 855 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 856 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 857 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 858 859 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 860 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 861 862 tmp = a->solve_work; 863 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 864 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 865 866 for (neq=0; neq<B->cmap->n; neq++){ 867 /* forward solve the lower triangular */ 868 tmp[0] = b[r[0]]; 869 tmps = tmp; 870 for (i=1; i<n; i++) { 871 v = aa + ai[i] ; 872 vi = aj + ai[i] ; 873 nz = a->diag[i] - ai[i]; 874 sum = b[r[i]]; 875 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 876 tmp[i] = sum; 877 } 878 /* backward solve the upper triangular */ 879 for (i=n-1; i>=0; i--){ 880 v = aa + a->diag[i] + 1; 881 vi = aj + a->diag[i] + 1; 882 nz = ai[i+1] - a->diag[i] - 1; 883 sum = tmp[i]; 884 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 885 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 886 } 887 888 b += n; 889 x += n; 890 } 891 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 892 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 893 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 894 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 895 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 896 PetscFunctionReturn(0); 897 } 898 899 #undef __FUNCT__ 900 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 901 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 902 { 903 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 904 IS iscol = a->col,isrow = a->row; 905 PetscErrorCode ierr; 906 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 907 PetscInt nz,*rout,*cout,row; 908 PetscScalar *x,*b,*tmp,*tmps,sum; 909 const MatScalar *aa = a->a,*v; 910 911 PetscFunctionBegin; 912 if (!n) PetscFunctionReturn(0); 913 914 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 915 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 916 tmp = a->solve_work; 917 918 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 919 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 920 921 /* forward solve the lower triangular */ 922 tmp[0] = b[*r++]; 923 tmps = tmp; 924 for (row=1; row<n; row++) { 925 i = rout[row]; /* permuted row */ 926 v = aa + ai[i] ; 927 vi = aj + ai[i] ; 928 nz = a->diag[i] - ai[i]; 929 sum = b[*r++]; 930 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 931 tmp[row] = sum; 932 } 933 934 /* backward solve the upper triangular */ 935 for (row=n-1; row>=0; row--){ 936 i = rout[row]; /* permuted row */ 937 v = aa + a->diag[i] + 1; 938 vi = aj + a->diag[i] + 1; 939 nz = ai[i+1] - a->diag[i] - 1; 940 sum = tmp[row]; 941 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 942 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 943 } 944 945 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 946 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 947 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 949 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 /* ----------------------------------------------------------- */ 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 956 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 957 { 958 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 959 PetscErrorCode ierr; 960 PetscInt n = A->rmap->n; 961 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 962 PetscScalar *x; 963 const PetscScalar *b; 964 const MatScalar *aa = a->a; 965 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 966 PetscInt adiag_i,i,nz,ai_i; 967 const MatScalar *v; 968 PetscScalar sum; 969 #endif 970 971 PetscFunctionBegin; 972 if (!n) PetscFunctionReturn(0); 973 974 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 975 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 976 977 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 978 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 979 #else 980 /* forward solve the lower triangular */ 981 x[0] = b[0]; 982 for (i=1; i<n; i++) { 983 ai_i = ai[i]; 984 v = aa + ai_i; 985 vi = aj + ai_i; 986 nz = adiag[i] - ai_i; 987 sum = b[i]; 988 while (nz--) sum -= *v++ * x[*vi++]; 989 x[i] = sum; 990 } 991 992 /* backward solve the upper triangular */ 993 for (i=n-1; i>=0; i--){ 994 adiag_i = adiag[i]; 995 v = aa + adiag_i + 1; 996 vi = aj + adiag_i + 1; 997 nz = ai[i+1] - adiag_i - 1; 998 sum = x[i]; 999 while (nz--) sum -= *v++ * x[*vi++]; 1000 x[i] = sum*aa[adiag_i]; 1001 } 1002 #endif 1003 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1004 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1005 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1011 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1012 { 1013 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1014 IS iscol = a->col,isrow = a->row; 1015 PetscErrorCode ierr; 1016 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1017 PetscInt nz,*rout,*cout; 1018 PetscScalar *x,*b,*tmp,sum; 1019 const MatScalar *aa = a->a,*v; 1020 1021 PetscFunctionBegin; 1022 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1023 1024 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1025 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1026 tmp = a->solve_work; 1027 1028 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1029 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1030 1031 /* forward solve the lower triangular */ 1032 tmp[0] = b[*r++]; 1033 for (i=1; i<n; i++) { 1034 v = aa + ai[i] ; 1035 vi = aj + ai[i] ; 1036 nz = a->diag[i] - ai[i]; 1037 sum = b[*r++]; 1038 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1039 tmp[i] = sum; 1040 } 1041 1042 /* backward solve the upper triangular */ 1043 for (i=n-1; i>=0; i--){ 1044 v = aa + a->diag[i] + 1; 1045 vi = aj + a->diag[i] + 1; 1046 nz = ai[i+1] - a->diag[i] - 1; 1047 sum = tmp[i]; 1048 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1049 tmp[i] = sum*aa[a->diag[i]]; 1050 x[*c--] += tmp[i]; 1051 } 1052 1053 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1054 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1055 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1056 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1057 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1058 1059 PetscFunctionReturn(0); 1060 } 1061 /* -------------------------------------------------------------------*/ 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1064 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1065 { 1066 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1067 IS iscol = a->col,isrow = a->row; 1068 PetscErrorCode ierr; 1069 PetscInt *r,*c,i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1070 PetscInt nz,*rout,*cout,*diag = a->diag; 1071 PetscScalar *x,*b,*tmp,s1; 1072 const MatScalar *aa = a->a,*v; 1073 1074 PetscFunctionBegin; 1075 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1076 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1077 tmp = a->solve_work; 1078 1079 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1080 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1081 1082 /* copy the b into temp work space according to permutation */ 1083 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1084 1085 /* forward solve the U^T */ 1086 for (i=0; i<n; i++) { 1087 v = aa + diag[i] ; 1088 vi = aj + diag[i] + 1; 1089 nz = ai[i+1] - diag[i] - 1; 1090 s1 = tmp[i]; 1091 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1092 while (nz--) { 1093 tmp[*vi++ ] -= (*v++)*s1; 1094 } 1095 tmp[i] = s1; 1096 } 1097 1098 /* backward solve the L^T */ 1099 for (i=n-1; i>=0; i--){ 1100 v = aa + diag[i] - 1 ; 1101 vi = aj + diag[i] - 1 ; 1102 nz = diag[i] - ai[i]; 1103 s1 = tmp[i]; 1104 while (nz--) { 1105 tmp[*vi-- ] -= (*v--)*s1; 1106 } 1107 } 1108 1109 /* copy tmp into x according to permutation */ 1110 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1111 1112 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1113 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1114 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1115 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1116 1117 ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1123 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1124 { 1125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1126 IS iscol = a->col,isrow = a->row; 1127 PetscErrorCode ierr; 1128 PetscInt *r,*c,i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1129 PetscInt nz,*rout,*cout,*diag = a->diag; 1130 PetscScalar *x,*b,*tmp; 1131 const MatScalar *aa = a->a,*v; 1132 1133 PetscFunctionBegin; 1134 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1135 1136 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1137 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1138 tmp = a->solve_work; 1139 1140 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1141 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1142 1143 /* copy the b into temp work space according to permutation */ 1144 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1145 1146 /* forward solve the U^T */ 1147 for (i=0; i<n; i++) { 1148 v = aa + diag[i] ; 1149 vi = aj + diag[i] + 1; 1150 nz = ai[i+1] - diag[i] - 1; 1151 tmp[i] *= *v++; 1152 while (nz--) { 1153 tmp[*vi++ ] -= (*v++)*tmp[i]; 1154 } 1155 } 1156 1157 /* backward solve the L^T */ 1158 for (i=n-1; i>=0; i--){ 1159 v = aa + diag[i] - 1 ; 1160 vi = aj + diag[i] - 1 ; 1161 nz = diag[i] - ai[i]; 1162 while (nz--) { 1163 tmp[*vi-- ] -= (*v--)*tmp[i]; 1164 } 1165 } 1166 1167 /* copy tmp into x according to permutation */ 1168 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1169 1170 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1171 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1172 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1173 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1174 1175 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 /* ----------------------------------------------------------------*/ 1179 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1180 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1184 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,MatFactorInfo *info) 1185 { 1186 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1187 IS isicol; 1188 PetscErrorCode ierr; 1189 PetscInt *r,*ic,n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1190 PetscInt *bi,*cols,nnz,*cols_lvl; 1191 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1192 PetscInt i,levels,diagonal_fill; 1193 PetscTruth col_identity,row_identity; 1194 PetscReal f; 1195 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1196 PetscBT lnkbt; 1197 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1198 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1199 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1200 PetscTruth missing; 1201 1202 PetscFunctionBegin; 1203 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1204 f = info->fill; 1205 levels = (PetscInt)info->levels; 1206 diagonal_fill = (PetscInt)info->diagonal_fill; 1207 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1208 1209 /* special case that simply copies fill pattern */ 1210 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1211 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1212 if (!levels && row_identity && col_identity) { 1213 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1214 fact->factor = MAT_FACTOR_ILU; 1215 (fact)->info.factor_mallocs = 0; 1216 (fact)->info.fill_ratio_given = info->fill; 1217 (fact)->info.fill_ratio_needed = 1.0; 1218 b = (Mat_SeqAIJ*)(fact)->data; 1219 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1220 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1221 b->row = isrow; 1222 b->col = iscol; 1223 b->icol = isicol; 1224 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1225 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1226 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1227 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1228 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1233 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1234 1235 /* get new row pointers */ 1236 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1237 bi[0] = 0; 1238 /* bdiag is location of diagonal in factor */ 1239 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1240 bdiag[0] = 0; 1241 1242 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1243 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1244 1245 /* create a linked list for storing column indices of the active row */ 1246 nlnk = n + 1; 1247 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1248 1249 /* initial FreeSpace size is f*(ai[n]+1) */ 1250 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1251 current_space = free_space; 1252 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1253 current_space_lvl = free_space_lvl; 1254 1255 for (i=0; i<n; i++) { 1256 nzi = 0; 1257 /* copy current row into linked list */ 1258 nnz = ai[r[i]+1] - ai[r[i]]; 1259 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1260 cols = aj + ai[r[i]]; 1261 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1262 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1263 nzi += nlnk; 1264 1265 /* make sure diagonal entry is included */ 1266 if (diagonal_fill && lnk[i] == -1) { 1267 fm = n; 1268 while (lnk[fm] < i) fm = lnk[fm]; 1269 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1270 lnk[fm] = i; 1271 lnk_lvl[i] = 0; 1272 nzi++; dcount++; 1273 } 1274 1275 /* add pivot rows into the active row */ 1276 nzbd = 0; 1277 prow = lnk[n]; 1278 while (prow < i) { 1279 nnz = bdiag[prow]; 1280 cols = bj_ptr[prow] + nnz + 1; 1281 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1282 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1283 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1284 nzi += nlnk; 1285 prow = lnk[prow]; 1286 nzbd++; 1287 } 1288 bdiag[i] = nzbd; 1289 bi[i+1] = bi[i] + nzi; 1290 1291 /* if free space is not available, make more free space */ 1292 if (current_space->local_remaining<nzi) { 1293 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1294 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1295 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1296 reallocs++; 1297 } 1298 1299 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1300 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1301 bj_ptr[i] = current_space->array; 1302 bjlvl_ptr[i] = current_space_lvl->array; 1303 1304 /* make sure the active row i has diagonal entry */ 1305 if (*(bj_ptr[i]+bdiag[i]) != i) { 1306 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1307 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1308 } 1309 1310 current_space->array += nzi; 1311 current_space->local_used += nzi; 1312 current_space->local_remaining -= nzi; 1313 current_space_lvl->array += nzi; 1314 current_space_lvl->local_used += nzi; 1315 current_space_lvl->local_remaining -= nzi; 1316 } 1317 1318 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1319 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1320 1321 /* destroy list of free space and other temporary arrays */ 1322 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1323 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1324 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1325 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1326 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1327 1328 #if defined(PETSC_USE_INFO) 1329 { 1330 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1331 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1332 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1333 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1334 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1335 if (diagonal_fill) { 1336 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1337 } 1338 } 1339 #endif 1340 1341 /* put together the new matrix */ 1342 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1343 b = (Mat_SeqAIJ*)(fact)->data; 1344 b->free_a = PETSC_TRUE; 1345 b->free_ij = PETSC_TRUE; 1346 b->singlemalloc = PETSC_FALSE; 1347 len = (bi[n] )*sizeof(PetscScalar); 1348 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1349 b->j = bj; 1350 b->i = bi; 1351 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1352 b->diag = bdiag; 1353 b->ilen = 0; 1354 b->imax = 0; 1355 b->row = isrow; 1356 b->col = iscol; 1357 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1358 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1359 b->icol = isicol; 1360 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1361 /* In b structure: Free imax, ilen, old a, old j. 1362 Allocate bdiag, solve_work, new a, new j */ 1363 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1364 b->maxnz = b->nz = bi[n] ; 1365 (fact)->info.factor_mallocs = reallocs; 1366 (fact)->info.fill_ratio_given = f; 1367 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1368 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1369 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #include "src/mat/impls/sbaij/seq/sbaij.h" 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1376 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,MatFactorInfo *info) 1377 { 1378 Mat C = B; 1379 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1380 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1381 IS ip=b->row,iip = b->icol; 1382 PetscErrorCode ierr; 1383 PetscInt *rip,*riip,i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1384 PetscInt *ai=a->i,*aj=a->j; 1385 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1386 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1387 PetscReal zeropivot,rs,shiftnz; 1388 PetscReal shiftpd; 1389 ChShift_Ctx sctx; 1390 PetscInt newshift; 1391 PetscTruth perm_identity; 1392 1393 PetscFunctionBegin; 1394 1395 shiftnz = info->shiftnz; 1396 shiftpd = info->shiftpd; 1397 zeropivot = info->zeropivot; 1398 1399 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1400 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1401 1402 /* initialization */ 1403 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1404 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1405 jl = il + mbs; 1406 rtmp = (MatScalar*)(jl + mbs); 1407 1408 sctx.shift_amount = 0; 1409 sctx.nshift = 0; 1410 do { 1411 sctx.chshift = PETSC_FALSE; 1412 for (i=0; i<mbs; i++) { 1413 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1414 } 1415 1416 for (k = 0; k<mbs; k++){ 1417 bval = ba + bi[k]; 1418 /* initialize k-th row by the perm[k]-th row of A */ 1419 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1420 for (j = jmin; j < jmax; j++){ 1421 col = riip[aj[j]]; 1422 if (col >= k){ /* only take upper triangular entry */ 1423 rtmp[col] = aa[j]; 1424 *bval++ = 0.0; /* for in-place factorization */ 1425 } 1426 } 1427 /* shift the diagonal of the matrix */ 1428 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1429 1430 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1431 dk = rtmp[k]; 1432 i = jl[k]; /* first row to be added to k_th row */ 1433 1434 while (i < k){ 1435 nexti = jl[i]; /* next row to be added to k_th row */ 1436 1437 /* compute multiplier, update diag(k) and U(i,k) */ 1438 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1439 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1440 dk += uikdi*ba[ili]; 1441 ba[ili] = uikdi; /* -U(i,k) */ 1442 1443 /* add multiple of row i to k-th row */ 1444 jmin = ili + 1; jmax = bi[i+1]; 1445 if (jmin < jmax){ 1446 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1447 /* update il and jl for row i */ 1448 il[i] = jmin; 1449 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1450 } 1451 i = nexti; 1452 } 1453 1454 /* shift the diagonals when zero pivot is detected */ 1455 /* compute rs=sum of abs(off-diagonal) */ 1456 rs = 0.0; 1457 jmin = bi[k]+1; 1458 nz = bi[k+1] - jmin; 1459 bcol = bj + jmin; 1460 while (nz--){ 1461 rs += PetscAbsScalar(rtmp[*bcol]); 1462 bcol++; 1463 } 1464 1465 sctx.rs = rs; 1466 sctx.pv = dk; 1467 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1468 1469 if (newshift == 1) { 1470 if (!sctx.shift_amount) { 1471 sctx.shift_amount = 1e-5; 1472 } 1473 break; 1474 } 1475 1476 /* copy data into U(k,:) */ 1477 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1478 jmin = bi[k]+1; jmax = bi[k+1]; 1479 if (jmin < jmax) { 1480 for (j=jmin; j<jmax; j++){ 1481 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1482 } 1483 /* add the k-th row into il and jl */ 1484 il[k] = jmin; 1485 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1486 } 1487 } 1488 } while (sctx.chshift); 1489 ierr = PetscFree(il);CHKERRQ(ierr); 1490 1491 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1492 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1493 1494 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1495 if (perm_identity){ 1496 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1497 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1498 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1499 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1500 } else { 1501 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1502 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1503 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1504 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1505 } 1506 1507 C->assembled = PETSC_TRUE; 1508 C->preallocated = PETSC_TRUE; 1509 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1510 if (sctx.nshift){ 1511 if (shiftnz) { 1512 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1513 } else if (shiftpd) { 1514 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1515 } 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1522 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,MatFactorInfo *info) 1523 { 1524 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1525 Mat_SeqSBAIJ *b; 1526 PetscErrorCode ierr; 1527 PetscTruth perm_identity,missing; 1528 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1529 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1530 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1531 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1532 PetscReal fill=info->fill,levels=info->levels; 1533 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1534 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1535 PetscBT lnkbt; 1536 IS iperm; 1537 1538 PetscFunctionBegin; 1539 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1540 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1541 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1542 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1543 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1544 1545 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1546 ui[0] = 0; 1547 1548 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1549 if (!levels && perm_identity) { 1550 1551 for (i=0; i<am; i++) { 1552 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1553 } 1554 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1555 cols = uj; 1556 for (i=0; i<am; i++) { 1557 aj = a->j + a->diag[i]; 1558 ncols = ui[i+1] - ui[i]; 1559 for (j=0; j<ncols; j++) *cols++ = *aj++; 1560 } 1561 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1562 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1563 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1564 1565 /* initialization */ 1566 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1567 1568 /* jl: linked list for storing indices of the pivot rows 1569 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1570 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1571 il = jl + am; 1572 uj_ptr = (PetscInt**)(il + am); 1573 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1574 for (i=0; i<am; i++){ 1575 jl[i] = am; il[i] = 0; 1576 } 1577 1578 /* create and initialize a linked list for storing column indices of the active row k */ 1579 nlnk = am + 1; 1580 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1581 1582 /* initial FreeSpace size is fill*(ai[am]+1) */ 1583 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1584 current_space = free_space; 1585 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1586 current_space_lvl = free_space_lvl; 1587 1588 for (k=0; k<am; k++){ /* for each active row k */ 1589 /* initialize lnk by the column indices of row rip[k] of A */ 1590 nzk = 0; 1591 ncols = ai[rip[k]+1] - ai[rip[k]]; 1592 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1593 ncols_upper = 0; 1594 for (j=0; j<ncols; j++){ 1595 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1596 if (riip[i] >= k){ /* only take upper triangular entry */ 1597 ajtmp[ncols_upper] = i; 1598 ncols_upper++; 1599 } 1600 } 1601 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1602 nzk += nlnk; 1603 1604 /* update lnk by computing fill-in for each pivot row to be merged in */ 1605 prow = jl[k]; /* 1st pivot row */ 1606 1607 while (prow < k){ 1608 nextprow = jl[prow]; 1609 1610 /* merge prow into k-th row */ 1611 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1612 jmax = ui[prow+1]; 1613 ncols = jmax-jmin; 1614 i = jmin - ui[prow]; 1615 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1616 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1617 j = *(uj - 1); 1618 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1619 nzk += nlnk; 1620 1621 /* update il and jl for prow */ 1622 if (jmin < jmax){ 1623 il[prow] = jmin; 1624 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1625 } 1626 prow = nextprow; 1627 } 1628 1629 /* if free space is not available, make more free space */ 1630 if (current_space->local_remaining<nzk) { 1631 i = am - k + 1; /* num of unfactored rows */ 1632 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1633 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1634 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1635 reallocs++; 1636 } 1637 1638 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1639 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1640 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1641 1642 /* add the k-th row into il and jl */ 1643 if (nzk > 1){ 1644 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1645 jl[k] = jl[i]; jl[i] = k; 1646 il[k] = ui[k] + 1; 1647 } 1648 uj_ptr[k] = current_space->array; 1649 uj_lvl_ptr[k] = current_space_lvl->array; 1650 1651 current_space->array += nzk; 1652 current_space->local_used += nzk; 1653 current_space->local_remaining -= nzk; 1654 1655 current_space_lvl->array += nzk; 1656 current_space_lvl->local_used += nzk; 1657 current_space_lvl->local_remaining -= nzk; 1658 1659 ui[k+1] = ui[k] + nzk; 1660 } 1661 1662 #if defined(PETSC_USE_INFO) 1663 if (ai[am] != 0) { 1664 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1665 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1666 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1667 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1668 } else { 1669 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1670 } 1671 #endif 1672 1673 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1674 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1675 ierr = PetscFree(jl);CHKERRQ(ierr); 1676 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1677 1678 /* destroy list of free space and other temporary array(s) */ 1679 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1680 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1681 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1682 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1683 1684 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1685 1686 /* put together the new matrix in MATSEQSBAIJ format */ 1687 1688 b = (Mat_SeqSBAIJ*)(fact)->data; 1689 b->singlemalloc = PETSC_FALSE; 1690 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1691 b->j = uj; 1692 b->i = ui; 1693 b->diag = 0; 1694 b->ilen = 0; 1695 b->imax = 0; 1696 b->row = perm; 1697 b->col = perm; 1698 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1699 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1700 b->icol = iperm; 1701 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1702 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1703 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1704 b->maxnz = b->nz = ui[am]; 1705 b->free_a = PETSC_TRUE; 1706 b->free_ij = PETSC_TRUE; 1707 1708 (fact)->info.factor_mallocs = reallocs; 1709 (fact)->info.fill_ratio_given = fill; 1710 if (ai[am] != 0) { 1711 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1712 } else { 1713 (fact)->info.fill_ratio_needed = 0.0; 1714 } 1715 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1721 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,MatFactorInfo *info) 1722 { 1723 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1724 Mat_SeqSBAIJ *b; 1725 PetscErrorCode ierr; 1726 PetscTruth perm_identity; 1727 PetscReal fill = info->fill; 1728 PetscInt *rip,*riip,i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1729 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1730 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1731 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1732 PetscBT lnkbt; 1733 IS iperm; 1734 1735 PetscFunctionBegin; 1736 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1737 /* check whether perm is the identity mapping */ 1738 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1739 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1740 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1741 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1742 1743 /* initialization */ 1744 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1745 ui[0] = 0; 1746 1747 /* jl: linked list for storing indices of the pivot rows 1748 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1749 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1750 il = jl + am; 1751 cols = il + am; 1752 ui_ptr = (PetscInt**)(cols + am); 1753 for (i=0; i<am; i++){ 1754 jl[i] = am; il[i] = 0; 1755 } 1756 1757 /* create and initialize a linked list for storing column indices of the active row k */ 1758 nlnk = am + 1; 1759 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1760 1761 /* initial FreeSpace size is fill*(ai[am]+1) */ 1762 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1763 current_space = free_space; 1764 1765 for (k=0; k<am; k++){ /* for each active row k */ 1766 /* initialize lnk by the column indices of row rip[k] of A */ 1767 nzk = 0; 1768 ncols = ai[rip[k]+1] - ai[rip[k]]; 1769 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1770 ncols_upper = 0; 1771 for (j=0; j<ncols; j++){ 1772 i = riip[*(aj + ai[rip[k]] + j)]; 1773 if (i >= k){ /* only take upper triangular entry */ 1774 cols[ncols_upper] = i; 1775 ncols_upper++; 1776 } 1777 } 1778 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1779 nzk += nlnk; 1780 1781 /* update lnk by computing fill-in for each pivot row to be merged in */ 1782 prow = jl[k]; /* 1st pivot row */ 1783 1784 while (prow < k){ 1785 nextprow = jl[prow]; 1786 /* merge prow into k-th row */ 1787 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1788 jmax = ui[prow+1]; 1789 ncols = jmax-jmin; 1790 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1791 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1792 nzk += nlnk; 1793 1794 /* update il and jl for prow */ 1795 if (jmin < jmax){ 1796 il[prow] = jmin; 1797 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1798 } 1799 prow = nextprow; 1800 } 1801 1802 /* if free space is not available, make more free space */ 1803 if (current_space->local_remaining<nzk) { 1804 i = am - k + 1; /* num of unfactored rows */ 1805 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1806 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1807 reallocs++; 1808 } 1809 1810 /* copy data into free space, then initialize lnk */ 1811 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1812 1813 /* add the k-th row into il and jl */ 1814 if (nzk-1 > 0){ 1815 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1816 jl[k] = jl[i]; jl[i] = k; 1817 il[k] = ui[k] + 1; 1818 } 1819 ui_ptr[k] = current_space->array; 1820 current_space->array += nzk; 1821 current_space->local_used += nzk; 1822 current_space->local_remaining -= nzk; 1823 1824 ui[k+1] = ui[k] + nzk; 1825 } 1826 1827 #if defined(PETSC_USE_INFO) 1828 if (ai[am] != 0) { 1829 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1830 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1831 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1832 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1833 } else { 1834 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1835 } 1836 #endif 1837 1838 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1839 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1840 ierr = PetscFree(jl);CHKERRQ(ierr); 1841 1842 /* destroy list of free space and other temporary array(s) */ 1843 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1844 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1845 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1846 1847 /* put together the new matrix in MATSEQSBAIJ format */ 1848 1849 b = (Mat_SeqSBAIJ*)(fact)->data; 1850 b->singlemalloc = PETSC_FALSE; 1851 b->free_a = PETSC_TRUE; 1852 b->free_ij = PETSC_TRUE; 1853 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1854 b->j = uj; 1855 b->i = ui; 1856 b->diag = 0; 1857 b->ilen = 0; 1858 b->imax = 0; 1859 b->row = perm; 1860 b->col = perm; 1861 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1862 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1863 b->icol = iperm; 1864 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1865 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1866 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1867 b->maxnz = b->nz = ui[am]; 1868 1869 (fact)->info.factor_mallocs = reallocs; 1870 (fact)->info.fill_ratio_given = fill; 1871 if (ai[am] != 0) { 1872 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1873 } else { 1874 (fact)->info.fill_ratio_needed = 0.0; 1875 } 1876 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1877 PetscFunctionReturn(0); 1878 } 1879