1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 260 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 261 { 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 263 IS isicol; 264 PetscErrorCode ierr; 265 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 266 PetscInt *bi,*bj,*ajtmp; 267 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 268 PetscReal f; 269 PetscInt nlnk,*lnk,k,**bi_ptr; 270 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 271 PetscBT lnkbt; 272 273 PetscFunctionBegin; 274 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 275 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 276 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 277 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 278 279 /* get new row pointers */ 280 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 281 bi[0] = 0; 282 283 /* bdiag is location of diagonal in factor */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 285 bdiag[0] = 0; 286 287 /* linked list for storing column indices of the active row */ 288 nlnk = n + 1; 289 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 290 291 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 292 293 /* initial FreeSpace size is f*(ai[n]+1) */ 294 f = info->fill; 295 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 296 current_space = free_space; 297 298 for (i=0; i<n; i++) { 299 /* copy previous fill into linked list */ 300 nzi = 0; 301 nnz = ai[r[i]+1] - ai[r[i]]; 302 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]]; 304 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 305 nzi += nlnk; 306 307 /* add pivot rows into linked list */ 308 row = lnk[n]; 309 while (row < i) { 310 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 311 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 312 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 313 nzi += nlnk; 314 row = lnk[row]; 315 } 316 bi[i+1] = bi[i] + nzi; 317 im[i] = nzi; 318 319 /* mark bdiag */ 320 nzbd = 0; 321 nnz = nzi; 322 k = lnk[n]; 323 while (nnz-- && k < i){ 324 nzbd++; 325 k = lnk[k]; 326 } 327 bdiag[i] = bi[i] + nzbd; 328 329 /* if free space is not available, make more free space */ 330 if (current_space->local_remaining<nzi) { 331 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 332 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 333 reallocs++; 334 } 335 336 /* copy data into free space, then initialize lnk */ 337 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 338 bi_ptr[i] = current_space->array; 339 current_space->array += nzi; 340 current_space->local_used += nzi; 341 current_space->local_remaining -= nzi; 342 } 343 #if defined(PETSC_USE_INFO) 344 if (ai[n] != 0) { 345 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 346 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 347 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 349 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 352 } 353 #endif 354 355 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 356 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 357 358 /* destroy list of free space and other temporary array(s) */ 359 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 360 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 361 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 362 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 363 364 /* put together the new matrix */ 365 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 366 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 367 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 368 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 369 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 370 b = (Mat_SeqAIJ*)(*B)->data; 371 b->free_a = PETSC_TRUE; 372 b->free_ij = PETSC_TRUE; 373 b->singlemalloc = PETSC_FALSE; 374 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 375 b->j = bj; 376 b->i = bi; 377 b->diag = bdiag; 378 b->ilen = 0; 379 b->imax = 0; 380 b->row = isrow; 381 b->col = iscol; 382 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 383 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 384 b->icol = isicol; 385 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 386 387 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 388 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 389 b->maxnz = b->nz = bi[n] ; 390 391 (*B)->factor = FACTOR_LU; 392 (*B)->info.factor_mallocs = reallocs; 393 (*B)->info.fill_ratio_given = f; 394 395 if (ai[n] != 0) { 396 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 397 } else { 398 (*B)->info.fill_ratio_needed = 0.0; 399 } 400 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 401 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 402 PetscFunctionReturn(0); 403 } 404 405 /* 406 Trouble in factorization, should we dump the original matrix? 407 */ 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatFactorDumpMatrix" 410 PetscErrorCode MatFactorDumpMatrix(Mat A) 411 { 412 PetscErrorCode ierr; 413 PetscTruth flg; 414 415 PetscFunctionBegin; 416 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 417 if (flg) { 418 PetscViewer viewer; 419 char filename[PETSC_MAX_PATH_LEN]; 420 421 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 422 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 423 ierr = MatView(A,viewer);CHKERRQ(ierr); 424 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 /* ----------------------------------------------------------- */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 432 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 433 { 434 Mat C=*B; 435 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 436 IS isrow = b->row,isicol = b->icol; 437 PetscErrorCode ierr; 438 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 439 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 440 PetscInt *diag_offset = b->diag,diag,*pj; 441 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 442 PetscScalar d; 443 PetscReal rs; 444 LUShift_Ctx sctx; 445 PetscInt newshift,*ddiag; 446 447 PetscFunctionBegin; 448 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 449 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 450 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 451 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 452 rtmps = rtmp; ics = ic; 453 454 sctx.shift_top = 0; 455 sctx.nshift_max = 0; 456 sctx.shift_lo = 0; 457 sctx.shift_hi = 0; 458 459 /* if both shift schemes are chosen by user, only use info->shiftpd */ 460 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 461 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 462 PetscInt *aai = a->i; 463 ddiag = a->diag; 464 sctx.shift_top = 0; 465 for (i=0; i<n; i++) { 466 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 467 d = (a->a)[ddiag[i]]; 468 rs = -PetscAbsScalar(d) - PetscRealPart(d); 469 v = a->a+aai[i]; 470 nz = aai[i+1] - aai[i]; 471 for (j=0; j<nz; j++) 472 rs += PetscAbsScalar(v[j]); 473 if (rs>sctx.shift_top) sctx.shift_top = rs; 474 } 475 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 476 sctx.shift_top *= 1.1; 477 sctx.nshift_max = 5; 478 sctx.shift_lo = 0.; 479 sctx.shift_hi = 1.; 480 } 481 482 sctx.shift_amount = 0; 483 sctx.nshift = 0; 484 do { 485 sctx.lushift = PETSC_FALSE; 486 for (i=0; i<n; i++){ 487 nz = bi[i+1] - bi[i]; 488 bjtmp = bj + bi[i]; 489 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 490 491 /* load in initial (unfactored row) */ 492 nz = a->i[r[i]+1] - a->i[r[i]]; 493 ajtmp = a->j + a->i[r[i]]; 494 v = a->a + a->i[r[i]]; 495 for (j=0; j<nz; j++) { 496 rtmp[ics[ajtmp[j]]] = v[j]; 497 } 498 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 499 500 row = *bjtmp++; 501 while (row < i) { 502 pc = rtmp + row; 503 if (*pc != 0.0) { 504 pv = b->a + diag_offset[row]; 505 pj = b->j + diag_offset[row] + 1; 506 multiplier = *pc / *pv++; 507 *pc = multiplier; 508 nz = bi[row+1] - diag_offset[row] - 1; 509 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 510 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 511 } 512 row = *bjtmp++; 513 } 514 /* finished row so stick it into b->a */ 515 pv = b->a + bi[i] ; 516 pj = b->j + bi[i] ; 517 nz = bi[i+1] - bi[i]; 518 diag = diag_offset[i] - bi[i]; 519 rs = 0.0; 520 for (j=0; j<nz; j++) { 521 pv[j] = rtmps[pj[j]]; 522 if (j != diag) rs += PetscAbsScalar(pv[j]); 523 } 524 525 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 526 sctx.rs = rs; 527 sctx.pv = pv[diag]; 528 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 529 if (newshift == 1) break; 530 } 531 532 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 533 /* 534 * if no shift in this attempt & shifting & started shifting & can refine, 535 * then try lower shift 536 */ 537 sctx.shift_hi = info->shift_fraction; 538 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 539 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 540 sctx.lushift = PETSC_TRUE; 541 sctx.nshift++; 542 } 543 } while (sctx.lushift); 544 545 /* invert diagonal entries for simplier triangular solves */ 546 for (i=0; i<n; i++) { 547 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 548 } 549 550 ierr = PetscFree(rtmp);CHKERRQ(ierr); 551 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 553 C->factor = FACTOR_LU; 554 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 555 C->assembled = PETSC_TRUE; 556 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 557 if (sctx.nshift){ 558 if (info->shiftnz) { 559 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 560 } else if (info->shiftpd) { 561 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 562 } 563 } 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 This routine implements inplace ILU(0) with row or/and column permutations. 569 Input: 570 A - original matrix 571 Output; 572 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 573 a->j (col index) is permuted by the inverse of colperm, then sorted 574 a->a reordered accordingly with a->j 575 a->diag (ptr to diagonal elements) is updated. 576 */ 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 579 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 580 { 581 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 582 IS isrow = a->row,isicol = a->icol; 583 PetscErrorCode ierr; 584 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 585 PetscInt *ajtmp,nz,row,*ics; 586 PetscInt *diag = a->diag,nbdiag,*pj; 587 PetscScalar *rtmp,*v,*pc,multiplier,*pv,d; 588 PetscReal rs; 589 LUShift_Ctx sctx; 590 PetscInt newshift; 591 592 PetscFunctionBegin; 593 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 594 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 595 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 596 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 597 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 598 ics = ic; 599 600 sctx.shift_top = 0; 601 sctx.nshift_max = 0; 602 sctx.shift_lo = 0; 603 sctx.shift_hi = 0; 604 605 /* if both shift schemes are chosen by user, only use info->shiftpd */ 606 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 607 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 608 sctx.shift_top = 0; 609 for (i=0; i<n; i++) { 610 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 611 d = (a->a)[diag[i]]; 612 rs = -PetscAbsScalar(d) - PetscRealPart(d); 613 v = a->a+ai[i]; 614 nz = ai[i+1] - ai[i]; 615 for (j=0; j<nz; j++) 616 rs += PetscAbsScalar(v[j]); 617 if (rs>sctx.shift_top) sctx.shift_top = rs; 618 } 619 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 620 sctx.shift_top *= 1.1; 621 sctx.nshift_max = 5; 622 sctx.shift_lo = 0.; 623 sctx.shift_hi = 1.; 624 } 625 626 sctx.shift_amount = 0; 627 sctx.nshift = 0; 628 do { 629 sctx.lushift = PETSC_FALSE; 630 for (i=0; i<n; i++){ 631 /* load in initial unfactored row */ 632 nz = ai[r[i]+1] - ai[r[i]]; 633 ajtmp = aj + ai[r[i]]; 634 v = a->a + ai[r[i]]; 635 /* sort permuted ajtmp and values v accordingly */ 636 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 637 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 638 639 diag[r[i]] = ai[r[i]]; 640 for (j=0; j<nz; j++) { 641 rtmp[ajtmp[j]] = v[j]; 642 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 643 } 644 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 645 646 row = *ajtmp++; 647 while (row < i) { 648 pc = rtmp + row; 649 if (*pc != 0.0) { 650 pv = a->a + diag[r[row]]; 651 pj = aj + diag[r[row]] + 1; 652 653 multiplier = *pc / *pv++; 654 *pc = multiplier; 655 nz = ai[r[row]+1] - diag[r[row]] - 1; 656 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 657 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 658 } 659 row = *ajtmp++; 660 } 661 /* finished row so overwrite it onto a->a */ 662 pv = a->a + ai[r[i]] ; 663 pj = aj + ai[r[i]] ; 664 nz = ai[r[i]+1] - ai[r[i]]; 665 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 666 667 rs = 0.0; 668 for (j=0; j<nz; j++) { 669 pv[j] = rtmp[pj[j]]; 670 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 671 } 672 673 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 674 sctx.rs = rs; 675 sctx.pv = pv[nbdiag]; 676 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 677 if (newshift == 1) break; 678 } 679 680 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 681 /* 682 * if no shift in this attempt & shifting & started shifting & can refine, 683 * then try lower shift 684 */ 685 sctx.shift_hi = info->shift_fraction; 686 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 687 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 688 sctx.lushift = PETSC_TRUE; 689 sctx.nshift++; 690 } 691 } while (sctx.lushift); 692 693 /* invert diagonal entries for simplier triangular solves */ 694 for (i=0; i<n; i++) { 695 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 696 } 697 698 ierr = PetscFree(rtmp);CHKERRQ(ierr); 699 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 701 A->factor = FACTOR_LU; 702 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 703 A->assembled = PETSC_TRUE; 704 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 705 if (sctx.nshift){ 706 if (info->shiftnz) { 707 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 708 } else if (info->shiftpd) { 709 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 710 } 711 } 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 717 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 718 { 719 PetscFunctionBegin; 720 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 721 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 726 /* ----------------------------------------------------------- */ 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatLUFactor_SeqAIJ" 729 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 730 { 731 PetscErrorCode ierr; 732 Mat C; 733 734 PetscFunctionBegin; 735 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 736 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 737 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 738 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 /* ----------------------------------------------------------- */ 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatSolve_SeqAIJ" 744 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 745 { 746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 747 IS iscol = a->col,isrow = a->row; 748 PetscErrorCode ierr; 749 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 750 PetscInt nz,*rout,*cout; 751 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 752 753 PetscFunctionBegin; 754 if (!n) PetscFunctionReturn(0); 755 756 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 757 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 758 tmp = a->solve_work; 759 760 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 761 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 762 763 /* forward solve the lower triangular */ 764 tmp[0] = b[*r++]; 765 tmps = tmp; 766 for (i=1; i<n; i++) { 767 v = aa + ai[i] ; 768 vi = aj + ai[i] ; 769 nz = a->diag[i] - ai[i]; 770 sum = b[*r++]; 771 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 772 tmp[i] = sum; 773 } 774 775 /* backward solve the upper triangular */ 776 for (i=n-1; i>=0; i--){ 777 v = aa + a->diag[i] + 1; 778 vi = aj + a->diag[i] + 1; 779 nz = ai[i+1] - a->diag[i] - 1; 780 sum = tmp[i]; 781 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 782 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 783 } 784 785 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 786 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 787 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 788 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 789 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatMatSolve_SeqAIJ" 795 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 796 { 797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 798 IS iscol = a->col,isrow = a->row; 799 PetscErrorCode ierr; 800 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 801 PetscInt nz,*rout,*cout,neq; 802 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 803 804 PetscFunctionBegin; 805 if (!n) PetscFunctionReturn(0); 806 807 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 808 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 809 810 tmp = a->solve_work; 811 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 812 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 813 814 for (neq=0; neq<n; neq++){ 815 /* forward solve the lower triangular */ 816 tmp[0] = b[r[0]]; 817 tmps = tmp; 818 for (i=1; i<n; i++) { 819 v = aa + ai[i] ; 820 vi = aj + ai[i] ; 821 nz = a->diag[i] - ai[i]; 822 sum = b[r[i]]; 823 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 824 tmp[i] = sum; 825 } 826 /* backward solve the upper triangular */ 827 for (i=n-1; i>=0; i--){ 828 v = aa + a->diag[i] + 1; 829 vi = aj + a->diag[i] + 1; 830 nz = ai[i+1] - a->diag[i] - 1; 831 sum = tmp[i]; 832 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 833 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 834 } 835 836 b += n; 837 x += n; 838 } 839 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 840 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 841 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 842 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 843 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 844 PetscFunctionReturn(0); 845 } 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 849 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 850 { 851 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 852 IS iscol = a->col,isrow = a->row; 853 PetscErrorCode ierr; 854 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 855 PetscInt nz,*rout,*cout,row; 856 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 857 858 PetscFunctionBegin; 859 if (!n) PetscFunctionReturn(0); 860 861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 862 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 863 tmp = a->solve_work; 864 865 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 866 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 867 868 /* forward solve the lower triangular */ 869 tmp[0] = b[*r++]; 870 tmps = tmp; 871 for (row=1; row<n; row++) { 872 i = rout[row]; /* permuted row */ 873 v = aa + ai[i] ; 874 vi = aj + ai[i] ; 875 nz = a->diag[i] - ai[i]; 876 sum = b[*r++]; 877 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 878 tmp[row] = sum; 879 } 880 881 /* backward solve the upper triangular */ 882 for (row=n-1; row>=0; row--){ 883 i = rout[row]; /* permuted row */ 884 v = aa + a->diag[i] + 1; 885 vi = aj + a->diag[i] + 1; 886 nz = ai[i+1] - a->diag[i] - 1; 887 sum = tmp[row]; 888 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 889 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 890 } 891 892 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 894 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 895 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 896 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 /* ----------------------------------------------------------- */ 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 903 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 904 { 905 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 906 PetscErrorCode ierr; 907 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 908 PetscScalar *x,*b,*aa = a->a; 909 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 910 PetscInt adiag_i,i,*vi,nz,ai_i; 911 PetscScalar *v,sum; 912 #endif 913 914 PetscFunctionBegin; 915 if (!n) PetscFunctionReturn(0); 916 917 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 919 920 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 921 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 922 #else 923 /* forward solve the lower triangular */ 924 x[0] = b[0]; 925 for (i=1; i<n; i++) { 926 ai_i = ai[i]; 927 v = aa + ai_i; 928 vi = aj + ai_i; 929 nz = adiag[i] - ai_i; 930 sum = b[i]; 931 while (nz--) sum -= *v++ * x[*vi++]; 932 x[i] = sum; 933 } 934 935 /* backward solve the upper triangular */ 936 for (i=n-1; i>=0; i--){ 937 adiag_i = adiag[i]; 938 v = aa + adiag_i + 1; 939 vi = aj + adiag_i + 1; 940 nz = ai[i+1] - adiag_i - 1; 941 sum = x[i]; 942 while (nz--) sum -= *v++ * x[*vi++]; 943 x[i] = sum*aa[adiag_i]; 944 } 945 #endif 946 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 947 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 954 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 955 { 956 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 957 IS iscol = a->col,isrow = a->row; 958 PetscErrorCode ierr; 959 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 960 PetscInt nz,*rout,*cout; 961 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 962 963 PetscFunctionBegin; 964 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 965 966 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 967 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 968 tmp = a->solve_work; 969 970 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 971 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 972 973 /* forward solve the lower triangular */ 974 tmp[0] = b[*r++]; 975 for (i=1; i<n; i++) { 976 v = aa + ai[i] ; 977 vi = aj + ai[i] ; 978 nz = a->diag[i] - ai[i]; 979 sum = b[*r++]; 980 while (nz--) sum -= *v++ * tmp[*vi++ ]; 981 tmp[i] = sum; 982 } 983 984 /* backward solve the upper triangular */ 985 for (i=n-1; i>=0; i--){ 986 v = aa + a->diag[i] + 1; 987 vi = aj + a->diag[i] + 1; 988 nz = ai[i+1] - a->diag[i] - 1; 989 sum = tmp[i]; 990 while (nz--) sum -= *v++ * tmp[*vi++ ]; 991 tmp[i] = sum*aa[a->diag[i]]; 992 x[*c--] += tmp[i]; 993 } 994 995 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 996 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 997 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 998 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 999 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1000 1001 PetscFunctionReturn(0); 1002 } 1003 /* -------------------------------------------------------------------*/ 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1006 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1007 { 1008 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1009 IS iscol = a->col,isrow = a->row; 1010 PetscErrorCode ierr; 1011 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1012 PetscInt nz,*rout,*cout,*diag = a->diag; 1013 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 1014 1015 PetscFunctionBegin; 1016 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1017 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1018 tmp = a->solve_work; 1019 1020 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1021 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1022 1023 /* copy the b into temp work space according to permutation */ 1024 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1025 1026 /* forward solve the U^T */ 1027 for (i=0; i<n; i++) { 1028 v = aa + diag[i] ; 1029 vi = aj + diag[i] + 1; 1030 nz = ai[i+1] - diag[i] - 1; 1031 s1 = tmp[i]; 1032 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1033 while (nz--) { 1034 tmp[*vi++ ] -= (*v++)*s1; 1035 } 1036 tmp[i] = s1; 1037 } 1038 1039 /* backward solve the L^T */ 1040 for (i=n-1; i>=0; i--){ 1041 v = aa + diag[i] - 1 ; 1042 vi = aj + diag[i] - 1 ; 1043 nz = diag[i] - ai[i]; 1044 s1 = tmp[i]; 1045 while (nz--) { 1046 tmp[*vi-- ] -= (*v--)*s1; 1047 } 1048 } 1049 1050 /* copy tmp into x according to permutation */ 1051 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1052 1053 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1054 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1055 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1056 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1057 1058 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1064 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1065 { 1066 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1067 IS iscol = a->col,isrow = a->row; 1068 PetscErrorCode ierr; 1069 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1070 PetscInt nz,*rout,*cout,*diag = a->diag; 1071 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 1072 1073 PetscFunctionBegin; 1074 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1075 1076 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1077 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1078 tmp = a->solve_work; 1079 1080 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1081 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1082 1083 /* copy the b into temp work space according to permutation */ 1084 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1085 1086 /* forward solve the U^T */ 1087 for (i=0; i<n; i++) { 1088 v = aa + diag[i] ; 1089 vi = aj + diag[i] + 1; 1090 nz = ai[i+1] - diag[i] - 1; 1091 tmp[i] *= *v++; 1092 while (nz--) { 1093 tmp[*vi++ ] -= (*v++)*tmp[i]; 1094 } 1095 } 1096 1097 /* backward solve the L^T */ 1098 for (i=n-1; i>=0; i--){ 1099 v = aa + diag[i] - 1 ; 1100 vi = aj + diag[i] - 1 ; 1101 nz = diag[i] - ai[i]; 1102 while (nz--) { 1103 tmp[*vi-- ] -= (*v--)*tmp[i]; 1104 } 1105 } 1106 1107 /* copy tmp into x according to permutation */ 1108 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1109 1110 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1111 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1112 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 1115 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 /* ----------------------------------------------------------------*/ 1119 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*); 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1123 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1124 { 1125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1126 IS isicol; 1127 PetscErrorCode ierr; 1128 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1129 PetscInt *bi,*cols,nnz,*cols_lvl; 1130 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1131 PetscInt i,levels,diagonal_fill; 1132 PetscTruth col_identity,row_identity; 1133 PetscReal f; 1134 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1135 PetscBT lnkbt; 1136 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1137 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1138 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1139 PetscTruth missing; 1140 1141 PetscFunctionBegin; 1142 f = info->fill; 1143 levels = (PetscInt)info->levels; 1144 diagonal_fill = (PetscInt)info->diagonal_fill; 1145 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1146 1147 /* special case that simply copies fill pattern */ 1148 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1149 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1150 if (!levels && row_identity && col_identity) { 1151 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1152 (*fact)->factor = FACTOR_LU; 1153 (*fact)->info.factor_mallocs = 0; 1154 (*fact)->info.fill_ratio_given = info->fill; 1155 (*fact)->info.fill_ratio_needed = 1.0; 1156 b = (Mat_SeqAIJ*)(*fact)->data; 1157 ierr = MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);CHKERRQ(ierr); 1158 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1159 b->row = isrow; 1160 b->col = iscol; 1161 b->icol = isicol; 1162 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1163 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1164 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1165 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1170 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1171 1172 /* get new row pointers */ 1173 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1174 bi[0] = 0; 1175 /* bdiag is location of diagonal in factor */ 1176 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1177 bdiag[0] = 0; 1178 1179 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1180 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1181 1182 /* create a linked list for storing column indices of the active row */ 1183 nlnk = n + 1; 1184 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1185 1186 /* initial FreeSpace size is f*(ai[n]+1) */ 1187 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1188 current_space = free_space; 1189 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1190 current_space_lvl = free_space_lvl; 1191 1192 for (i=0; i<n; i++) { 1193 nzi = 0; 1194 /* copy current row into linked list */ 1195 nnz = ai[r[i]+1] - ai[r[i]]; 1196 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1197 cols = aj + ai[r[i]]; 1198 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1199 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1200 nzi += nlnk; 1201 1202 /* make sure diagonal entry is included */ 1203 if (diagonal_fill && lnk[i] == -1) { 1204 fm = n; 1205 while (lnk[fm] < i) fm = lnk[fm]; 1206 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1207 lnk[fm] = i; 1208 lnk_lvl[i] = 0; 1209 nzi++; dcount++; 1210 } 1211 1212 /* add pivot rows into the active row */ 1213 nzbd = 0; 1214 prow = lnk[n]; 1215 while (prow < i) { 1216 nnz = bdiag[prow]; 1217 cols = bj_ptr[prow] + nnz + 1; 1218 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1219 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1220 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1221 nzi += nlnk; 1222 prow = lnk[prow]; 1223 nzbd++; 1224 } 1225 bdiag[i] = nzbd; 1226 bi[i+1] = bi[i] + nzi; 1227 1228 /* if free space is not available, make more free space */ 1229 if (current_space->local_remaining<nzi) { 1230 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1231 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1232 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1233 reallocs++; 1234 } 1235 1236 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1237 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1238 bj_ptr[i] = current_space->array; 1239 bjlvl_ptr[i] = current_space_lvl->array; 1240 1241 /* make sure the active row i has diagonal entry */ 1242 if (*(bj_ptr[i]+bdiag[i]) != i) { 1243 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1244 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1245 } 1246 1247 current_space->array += nzi; 1248 current_space->local_used += nzi; 1249 current_space->local_remaining -= nzi; 1250 current_space_lvl->array += nzi; 1251 current_space_lvl->local_used += nzi; 1252 current_space_lvl->local_remaining -= nzi; 1253 } 1254 1255 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1256 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1257 1258 /* destroy list of free space and other temporary arrays */ 1259 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1260 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1261 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1262 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1263 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1264 1265 #if defined(PETSC_USE_INFO) 1266 { 1267 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1268 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1269 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1270 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1271 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1272 if (diagonal_fill) { 1273 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1274 } 1275 } 1276 #endif 1277 1278 /* put together the new matrix */ 1279 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1280 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1281 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1282 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1284 b = (Mat_SeqAIJ*)(*fact)->data; 1285 b->free_a = PETSC_TRUE; 1286 b->free_ij = PETSC_TRUE; 1287 b->singlemalloc = PETSC_FALSE; 1288 len = (bi[n] )*sizeof(PetscScalar); 1289 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1290 b->j = bj; 1291 b->i = bi; 1292 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1293 b->diag = bdiag; 1294 b->ilen = 0; 1295 b->imax = 0; 1296 b->row = isrow; 1297 b->col = iscol; 1298 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1299 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1300 b->icol = isicol; 1301 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1302 /* In b structure: Free imax, ilen, old a, old j. 1303 Allocate bdiag, solve_work, new a, new j */ 1304 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1305 b->maxnz = b->nz = bi[n] ; 1306 (*fact)->factor = FACTOR_LU; 1307 (*fact)->info.factor_mallocs = reallocs; 1308 (*fact)->info.fill_ratio_given = f; 1309 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1310 1311 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1312 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1313 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #include "src/mat/impls/sbaij/seq/sbaij.h" 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1320 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1321 { 1322 Mat C = *B; 1323 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1324 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1325 IS ip=b->row,iip = b->icol; 1326 PetscErrorCode ierr; 1327 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1328 PetscInt *ai=a->i,*aj=a->j; 1329 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1330 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1331 PetscReal zeropivot,rs,shiftnz; 1332 PetscReal shiftpd; 1333 ChShift_Ctx sctx; 1334 PetscInt newshift; 1335 1336 PetscFunctionBegin; 1337 shiftnz = info->shiftnz; 1338 shiftpd = info->shiftpd; 1339 zeropivot = info->zeropivot; 1340 1341 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1342 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1343 1344 /* initialization */ 1345 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1346 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1347 jl = il + mbs; 1348 rtmp = (MatScalar*)(jl + mbs); 1349 1350 sctx.shift_amount = 0; 1351 sctx.nshift = 0; 1352 do { 1353 sctx.chshift = PETSC_FALSE; 1354 for (i=0; i<mbs; i++) { 1355 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1356 } 1357 1358 for (k = 0; k<mbs; k++){ 1359 bval = ba + bi[k]; 1360 /* initialize k-th row by the perm[k]-th row of A */ 1361 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1362 for (j = jmin; j < jmax; j++){ 1363 col = riip[aj[j]]; 1364 if (col >= k){ /* only take upper triangular entry */ 1365 rtmp[col] = aa[j]; 1366 *bval++ = 0.0; /* for in-place factorization */ 1367 } 1368 } 1369 /* shift the diagonal of the matrix */ 1370 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1371 1372 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1373 dk = rtmp[k]; 1374 i = jl[k]; /* first row to be added to k_th row */ 1375 1376 while (i < k){ 1377 nexti = jl[i]; /* next row to be added to k_th row */ 1378 1379 /* compute multiplier, update diag(k) and U(i,k) */ 1380 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1381 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1382 dk += uikdi*ba[ili]; 1383 ba[ili] = uikdi; /* -U(i,k) */ 1384 1385 /* add multiple of row i to k-th row */ 1386 jmin = ili + 1; jmax = bi[i+1]; 1387 if (jmin < jmax){ 1388 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1389 /* update il and jl for row i */ 1390 il[i] = jmin; 1391 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1392 } 1393 i = nexti; 1394 } 1395 1396 /* shift the diagonals when zero pivot is detected */ 1397 /* compute rs=sum of abs(off-diagonal) */ 1398 rs = 0.0; 1399 jmin = bi[k]+1; 1400 nz = bi[k+1] - jmin; 1401 bcol = bj + jmin; 1402 while (nz--){ 1403 rs += PetscAbsScalar(rtmp[*bcol]); 1404 bcol++; 1405 } 1406 1407 sctx.rs = rs; 1408 sctx.pv = dk; 1409 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1410 if (newshift == 1) break; 1411 1412 /* copy data into U(k,:) */ 1413 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1414 jmin = bi[k]+1; jmax = bi[k+1]; 1415 if (jmin < jmax) { 1416 for (j=jmin; j<jmax; j++){ 1417 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1418 } 1419 /* add the k-th row into il and jl */ 1420 il[k] = jmin; 1421 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1422 } 1423 } 1424 } while (sctx.chshift); 1425 ierr = PetscFree(il);CHKERRQ(ierr); 1426 1427 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1428 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1429 C->factor = FACTOR_CHOLESKY; 1430 C->assembled = PETSC_TRUE; 1431 C->preallocated = PETSC_TRUE; 1432 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1433 if (sctx.nshift){ 1434 if (shiftnz) { 1435 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1436 } else if (shiftpd) { 1437 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1438 } 1439 } 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1445 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1446 { 1447 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1448 Mat_SeqSBAIJ *b; 1449 Mat B; 1450 PetscErrorCode ierr; 1451 PetscTruth perm_identity; 1452 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1453 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1454 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1455 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1456 PetscReal fill=info->fill,levels=info->levels; 1457 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1458 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1459 PetscBT lnkbt; 1460 IS iperm; 1461 1462 PetscFunctionBegin; 1463 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1464 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1465 1466 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1467 ui[0] = 0; 1468 1469 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1470 if (!levels && perm_identity) { 1471 for (i=0; i<am; i++) { 1472 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1473 } 1474 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1475 cols = uj; 1476 for (i=0; i<am; i++) { 1477 aj = a->j + a->diag[i]; 1478 ncols = ui[i+1] - ui[i]; 1479 for (j=0; j<ncols; j++) *cols++ = *aj++; 1480 } 1481 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1482 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1483 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1484 1485 /* initialization */ 1486 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1487 1488 /* jl: linked list for storing indices of the pivot rows 1489 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1490 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1491 il = jl + am; 1492 uj_ptr = (PetscInt**)(il + am); 1493 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1494 for (i=0; i<am; i++){ 1495 jl[i] = am; il[i] = 0; 1496 } 1497 1498 /* create and initialize a linked list for storing column indices of the active row k */ 1499 nlnk = am + 1; 1500 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1501 1502 /* initial FreeSpace size is fill*(ai[am]+1) */ 1503 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1504 current_space = free_space; 1505 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1506 current_space_lvl = free_space_lvl; 1507 1508 for (k=0; k<am; k++){ /* for each active row k */ 1509 /* initialize lnk by the column indices of row rip[k] of A */ 1510 nzk = 0; 1511 ncols = ai[rip[k]+1] - ai[rip[k]]; 1512 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1513 ncols_upper = 0; 1514 for (j=0; j<ncols; j++){ 1515 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1516 if (riip[i] >= k){ /* only take upper triangular entry */ 1517 ajtmp[ncols_upper] = i; 1518 ncols_upper++; 1519 } 1520 } 1521 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1522 nzk += nlnk; 1523 1524 /* update lnk by computing fill-in for each pivot row to be merged in */ 1525 prow = jl[k]; /* 1st pivot row */ 1526 1527 while (prow < k){ 1528 nextprow = jl[prow]; 1529 1530 /* merge prow into k-th row */ 1531 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1532 jmax = ui[prow+1]; 1533 ncols = jmax-jmin; 1534 i = jmin - ui[prow]; 1535 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1536 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1537 j = *(uj - 1); 1538 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1539 nzk += nlnk; 1540 1541 /* update il and jl for prow */ 1542 if (jmin < jmax){ 1543 il[prow] = jmin; 1544 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1545 } 1546 prow = nextprow; 1547 } 1548 1549 /* if free space is not available, make more free space */ 1550 if (current_space->local_remaining<nzk) { 1551 i = am - k + 1; /* num of unfactored rows */ 1552 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1553 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1554 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1555 reallocs++; 1556 } 1557 1558 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1559 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1560 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1561 1562 /* add the k-th row into il and jl */ 1563 if (nzk > 1){ 1564 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1565 jl[k] = jl[i]; jl[i] = k; 1566 il[k] = ui[k] + 1; 1567 } 1568 uj_ptr[k] = current_space->array; 1569 uj_lvl_ptr[k] = current_space_lvl->array; 1570 1571 current_space->array += nzk; 1572 current_space->local_used += nzk; 1573 current_space->local_remaining -= nzk; 1574 1575 current_space_lvl->array += nzk; 1576 current_space_lvl->local_used += nzk; 1577 current_space_lvl->local_remaining -= nzk; 1578 1579 ui[k+1] = ui[k] + nzk; 1580 } 1581 1582 #if defined(PETSC_USE_INFO) 1583 if (ai[am] != 0) { 1584 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1585 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1586 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1587 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1588 } else { 1589 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1590 } 1591 #endif 1592 1593 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1594 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1595 ierr = PetscFree(jl);CHKERRQ(ierr); 1596 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1597 1598 /* destroy list of free space and other temporary array(s) */ 1599 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1600 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1601 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1602 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1603 1604 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1605 1606 /* put together the new matrix in MATSEQSBAIJ format */ 1607 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1608 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1609 B = *fact; 1610 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1611 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1612 1613 b = (Mat_SeqSBAIJ*)B->data; 1614 b->singlemalloc = PETSC_FALSE; 1615 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1616 b->j = uj; 1617 b->i = ui; 1618 b->diag = 0; 1619 b->ilen = 0; 1620 b->imax = 0; 1621 b->row = perm; 1622 b->col = perm; 1623 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1624 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1625 b->icol = iperm; 1626 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1627 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1628 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1629 b->maxnz = b->nz = ui[am]; 1630 b->free_a = PETSC_TRUE; 1631 b->free_ij = PETSC_TRUE; 1632 1633 B->factor = FACTOR_CHOLESKY; 1634 B->info.factor_mallocs = reallocs; 1635 B->info.fill_ratio_given = fill; 1636 if (ai[am] != 0) { 1637 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1638 } else { 1639 B->info.fill_ratio_needed = 0.0; 1640 } 1641 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1642 if (perm_identity){ 1643 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1644 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1645 } 1646 PetscFunctionReturn(0); 1647 } 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1651 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1652 { 1653 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1654 Mat_SeqSBAIJ *b; 1655 Mat B; 1656 PetscErrorCode ierr; 1657 PetscTruth perm_identity; 1658 PetscReal fill = info->fill; 1659 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1660 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1661 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1662 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1663 PetscBT lnkbt; 1664 IS iperm; 1665 1666 PetscFunctionBegin; 1667 /* check whether perm is the identity mapping */ 1668 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1669 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1670 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1671 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1672 1673 /* initialization */ 1674 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1675 ui[0] = 0; 1676 1677 /* jl: linked list for storing indices of the pivot rows 1678 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1679 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1680 il = jl + am; 1681 cols = il + am; 1682 ui_ptr = (PetscInt**)(cols + am); 1683 for (i=0; i<am; i++){ 1684 jl[i] = am; il[i] = 0; 1685 } 1686 1687 /* create and initialize a linked list for storing column indices of the active row k */ 1688 nlnk = am + 1; 1689 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1690 1691 /* initial FreeSpace size is fill*(ai[am]+1) */ 1692 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1693 current_space = free_space; 1694 1695 for (k=0; k<am; k++){ /* for each active row k */ 1696 /* initialize lnk by the column indices of row rip[k] of A */ 1697 nzk = 0; 1698 ncols = ai[rip[k]+1] - ai[rip[k]]; 1699 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1700 ncols_upper = 0; 1701 for (j=0; j<ncols; j++){ 1702 i = riip[*(aj + ai[rip[k]] + j)]; 1703 if (i >= k){ /* only take upper triangular entry */ 1704 cols[ncols_upper] = i; 1705 ncols_upper++; 1706 } 1707 } 1708 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1709 nzk += nlnk; 1710 1711 /* update lnk by computing fill-in for each pivot row to be merged in */ 1712 prow = jl[k]; /* 1st pivot row */ 1713 1714 while (prow < k){ 1715 nextprow = jl[prow]; 1716 /* merge prow into k-th row */ 1717 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1718 jmax = ui[prow+1]; 1719 ncols = jmax-jmin; 1720 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1721 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1722 nzk += nlnk; 1723 1724 /* update il and jl for prow */ 1725 if (jmin < jmax){ 1726 il[prow] = jmin; 1727 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1728 } 1729 prow = nextprow; 1730 } 1731 1732 /* if free space is not available, make more free space */ 1733 if (current_space->local_remaining<nzk) { 1734 i = am - k + 1; /* num of unfactored rows */ 1735 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1736 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1737 reallocs++; 1738 } 1739 1740 /* copy data into free space, then initialize lnk */ 1741 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1742 1743 /* add the k-th row into il and jl */ 1744 if (nzk-1 > 0){ 1745 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1746 jl[k] = jl[i]; jl[i] = k; 1747 il[k] = ui[k] + 1; 1748 } 1749 ui_ptr[k] = current_space->array; 1750 current_space->array += nzk; 1751 current_space->local_used += nzk; 1752 current_space->local_remaining -= nzk; 1753 1754 ui[k+1] = ui[k] + nzk; 1755 } 1756 1757 #if defined(PETSC_USE_INFO) 1758 if (ai[am] != 0) { 1759 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1760 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1761 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1762 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1763 } else { 1764 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1765 } 1766 #endif 1767 1768 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1769 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1770 ierr = PetscFree(jl);CHKERRQ(ierr); 1771 1772 /* destroy list of free space and other temporary array(s) */ 1773 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1774 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1775 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1776 1777 /* put together the new matrix in MATSEQSBAIJ format */ 1778 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1779 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1780 B = *fact; 1781 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1782 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1783 1784 b = (Mat_SeqSBAIJ*)B->data; 1785 b->singlemalloc = PETSC_FALSE; 1786 b->free_a = PETSC_TRUE; 1787 b->free_ij = PETSC_TRUE; 1788 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1789 b->j = uj; 1790 b->i = ui; 1791 b->diag = 0; 1792 b->ilen = 0; 1793 b->imax = 0; 1794 b->row = perm; 1795 b->col = perm; 1796 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1797 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1798 b->icol = iperm; 1799 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1800 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1801 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1802 b->maxnz = b->nz = ui[am]; 1803 1804 B->factor = FACTOR_CHOLESKY; 1805 B->info.factor_mallocs = reallocs; 1806 B->info.fill_ratio_given = fill; 1807 if (ai[am] != 0) { 1808 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1809 } else { 1810 B->info.fill_ratio_needed = 0.0; 1811 } 1812 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1813 if (perm_identity){ 1814 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1815 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1816 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1817 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1818 } else { 1819 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1; 1820 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1821 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1822 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1823 } 1824 PetscFunctionReturn(0); 1825 } 1826