1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->sorted = PETSC_FALSE; 233 b->singlemalloc = PETSC_FALSE; 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 af = ((double)b->nz)/((double)a->nz) + .001; 248 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 250 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 251 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 252 253 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 254 255 PetscFunctionReturn(0); 256 #endif 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 261 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 262 { 263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 264 IS isicol; 265 PetscErrorCode ierr; 266 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 267 PetscInt *bi,*bj,*ajtmp; 268 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 269 PetscReal f; 270 PetscInt nlnk,*lnk,k,**bi_ptr; 271 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 272 PetscBT lnkbt; 273 274 PetscFunctionBegin; 275 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 276 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 278 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 279 280 /* get new row pointers */ 281 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 282 bi[0] = 0; 283 284 /* bdiag is location of diagonal in factor */ 285 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 286 bdiag[0] = 0; 287 288 /* linked list for storing column indices of the active row */ 289 nlnk = n + 1; 290 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 291 292 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 293 294 /* initial FreeSpace size is f*(ai[n]+1) */ 295 f = info->fill; 296 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 297 current_space = free_space; 298 299 for (i=0; i<n; i++) { 300 /* copy previous fill into linked list */ 301 nzi = 0; 302 nnz = ai[r[i]+1] - ai[r[i]]; 303 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 304 ajtmp = aj + ai[r[i]]; 305 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 306 nzi += nlnk; 307 308 /* add pivot rows into linked list */ 309 row = lnk[n]; 310 while (row < i) { 311 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 312 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 313 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 314 nzi += nlnk; 315 row = lnk[row]; 316 } 317 bi[i+1] = bi[i] + nzi; 318 im[i] = nzi; 319 320 /* mark bdiag */ 321 nzbd = 0; 322 nnz = nzi; 323 k = lnk[n]; 324 while (nnz-- && k < i){ 325 nzbd++; 326 k = lnk[k]; 327 } 328 bdiag[i] = bi[i] + nzbd; 329 330 /* if free space is not available, make more free space */ 331 if (current_space->local_remaining<nzi) { 332 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 333 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 334 reallocs++; 335 } 336 337 /* copy data into free space, then initialize lnk */ 338 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 339 bi_ptr[i] = current_space->array; 340 current_space->array += nzi; 341 current_space->local_used += nzi; 342 current_space->local_remaining -= nzi; 343 } 344 #if defined(PETSC_USE_INFO) 345 if (ai[n] != 0) { 346 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 347 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 349 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 350 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 351 } else { 352 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 353 } 354 #endif 355 356 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 357 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 358 359 /* destroy list of free space and other temporary array(s) */ 360 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 361 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 362 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 363 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 364 365 /* put together the new matrix */ 366 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 367 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 368 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 369 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 370 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 371 b = (Mat_SeqAIJ*)(*B)->data; 372 b->free_a = PETSC_TRUE; 373 b->free_ij = PETSC_TRUE; 374 b->singlemalloc = PETSC_FALSE; 375 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 376 b->j = bj; 377 b->i = bi; 378 b->diag = bdiag; 379 b->ilen = 0; 380 b->imax = 0; 381 b->row = isrow; 382 b->col = iscol; 383 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 384 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 385 b->icol = isicol; 386 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 387 388 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 389 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 390 b->maxnz = b->nz = bi[n] ; 391 392 (*B)->factor = FACTOR_LU; 393 (*B)->info.factor_mallocs = reallocs; 394 (*B)->info.fill_ratio_given = f; 395 396 if (ai[n] != 0) { 397 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 398 } else { 399 (*B)->info.fill_ratio_needed = 0.0; 400 } 401 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 402 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 403 PetscFunctionReturn(0); 404 } 405 406 /* 407 Trouble in factorization, should we dump the original matrix? 408 */ 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatFactorDumpMatrix" 411 PetscErrorCode MatFactorDumpMatrix(Mat A) 412 { 413 PetscErrorCode ierr; 414 PetscTruth flg; 415 416 PetscFunctionBegin; 417 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 418 if (flg) { 419 PetscViewer viewer; 420 char filename[PETSC_MAX_PATH_LEN]; 421 422 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 423 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 424 ierr = MatView(A,viewer);CHKERRQ(ierr); 425 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 /* ----------------------------------------------------------- */ 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 433 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 434 { 435 Mat C=*B; 436 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 437 IS isrow = b->row,isicol = b->icol; 438 PetscErrorCode ierr; 439 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 440 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 441 PetscInt *diag_offset = b->diag,diag,*pj; 442 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 443 PetscScalar d; 444 PetscReal rs; 445 LUShift_Ctx sctx; 446 PetscInt newshift,*ddiag; 447 448 PetscFunctionBegin; 449 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 450 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 451 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 452 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 453 rtmps = rtmp; ics = ic; 454 455 sctx.shift_top = 0; 456 sctx.nshift_max = 0; 457 sctx.shift_lo = 0; 458 sctx.shift_hi = 0; 459 460 /* if both shift schemes are chosen by user, only use info->shiftpd */ 461 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 462 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 463 PetscInt *aai = a->i; 464 ddiag = a->diag; 465 sctx.shift_top = 0; 466 for (i=0; i<n; i++) { 467 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 468 d = (a->a)[ddiag[i]]; 469 rs = -PetscAbsScalar(d) - PetscRealPart(d); 470 v = a->a+aai[i]; 471 nz = aai[i+1] - aai[i]; 472 for (j=0; j<nz; j++) 473 rs += PetscAbsScalar(v[j]); 474 if (rs>sctx.shift_top) sctx.shift_top = rs; 475 } 476 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 477 sctx.shift_top *= 1.1; 478 sctx.nshift_max = 5; 479 sctx.shift_lo = 0.; 480 sctx.shift_hi = 1.; 481 } 482 483 sctx.shift_amount = 0; 484 sctx.nshift = 0; 485 do { 486 sctx.lushift = PETSC_FALSE; 487 for (i=0; i<n; i++){ 488 nz = bi[i+1] - bi[i]; 489 bjtmp = bj + bi[i]; 490 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 491 492 /* load in initial (unfactored row) */ 493 nz = a->i[r[i]+1] - a->i[r[i]]; 494 ajtmp = a->j + a->i[r[i]]; 495 v = a->a + a->i[r[i]]; 496 for (j=0; j<nz; j++) { 497 rtmp[ics[ajtmp[j]]] = v[j]; 498 } 499 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 500 501 row = *bjtmp++; 502 while (row < i) { 503 pc = rtmp + row; 504 if (*pc != 0.0) { 505 pv = b->a + diag_offset[row]; 506 pj = b->j + diag_offset[row] + 1; 507 multiplier = *pc / *pv++; 508 *pc = multiplier; 509 nz = bi[row+1] - diag_offset[row] - 1; 510 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 511 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 512 } 513 row = *bjtmp++; 514 } 515 /* finished row so stick it into b->a */ 516 pv = b->a + bi[i] ; 517 pj = b->j + bi[i] ; 518 nz = bi[i+1] - bi[i]; 519 diag = diag_offset[i] - bi[i]; 520 rs = 0.0; 521 for (j=0; j<nz; j++) { 522 pv[j] = rtmps[pj[j]]; 523 if (j != diag) rs += PetscAbsScalar(pv[j]); 524 } 525 526 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 527 sctx.rs = rs; 528 sctx.pv = pv[diag]; 529 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 530 if (newshift == 1) break; 531 } 532 533 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 534 /* 535 * if no shift in this attempt & shifting & started shifting & can refine, 536 * then try lower shift 537 */ 538 sctx.shift_hi = info->shift_fraction; 539 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 540 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 541 sctx.lushift = PETSC_TRUE; 542 sctx.nshift++; 543 } 544 } while (sctx.lushift); 545 546 /* invert diagonal entries for simplier triangular solves */ 547 for (i=0; i<n; i++) { 548 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 549 } 550 551 ierr = PetscFree(rtmp);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 553 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 554 C->factor = FACTOR_LU; 555 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 556 C->assembled = PETSC_TRUE; 557 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 558 if (sctx.nshift){ 559 if (info->shiftnz) { 560 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 561 } else if (info->shiftpd) { 562 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 563 } 564 } 565 PetscFunctionReturn(0); 566 } 567 568 /* 569 This routine implements inplace ILU(0) with row or/and column permutations. 570 Input: 571 A - original matrix 572 Output; 573 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 574 a->j (col index) is permuted by the inverse of colperm, then sorted 575 a->a reordered accordingly with a->j 576 a->diag (ptr to diagonal elements) is updated. 577 */ 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 580 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 581 { 582 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 583 IS isrow = a->row,isicol = a->icol; 584 PetscErrorCode ierr; 585 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 586 PetscInt *ajtmp,nz,row,*ics; 587 PetscInt *diag = a->diag,nbdiag,*pj; 588 PetscScalar *rtmp,*v,*pc,multiplier,*pv,d; 589 PetscReal rs; 590 LUShift_Ctx sctx; 591 PetscInt newshift; 592 593 PetscFunctionBegin; 594 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 595 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 596 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 597 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 598 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 599 ics = ic; 600 601 sctx.shift_top = 0; 602 sctx.nshift_max = 0; 603 sctx.shift_lo = 0; 604 sctx.shift_hi = 0; 605 606 /* if both shift schemes are chosen by user, only use info->shiftpd */ 607 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 608 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 609 sctx.shift_top = 0; 610 for (i=0; i<n; i++) { 611 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 612 d = (a->a)[diag[i]]; 613 rs = -PetscAbsScalar(d) - PetscRealPart(d); 614 v = a->a+ai[i]; 615 nz = ai[i+1] - ai[i]; 616 for (j=0; j<nz; j++) 617 rs += PetscAbsScalar(v[j]); 618 if (rs>sctx.shift_top) sctx.shift_top = rs; 619 } 620 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 621 sctx.shift_top *= 1.1; 622 sctx.nshift_max = 5; 623 sctx.shift_lo = 0.; 624 sctx.shift_hi = 1.; 625 } 626 627 sctx.shift_amount = 0; 628 sctx.nshift = 0; 629 do { 630 sctx.lushift = PETSC_FALSE; 631 for (i=0; i<n; i++){ 632 /* load in initial unfactored row */ 633 nz = ai[r[i]+1] - ai[r[i]]; 634 ajtmp = aj + ai[r[i]]; 635 v = a->a + ai[r[i]]; 636 /* sort permuted ajtmp and values v accordingly */ 637 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 638 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 639 640 diag[r[i]] = ai[r[i]]; 641 for (j=0; j<nz; j++) { 642 rtmp[ajtmp[j]] = v[j]; 643 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 644 } 645 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 646 647 row = *ajtmp++; 648 while (row < i) { 649 pc = rtmp + row; 650 if (*pc != 0.0) { 651 pv = a->a + diag[r[row]]; 652 pj = aj + diag[r[row]] + 1; 653 654 multiplier = *pc / *pv++; 655 *pc = multiplier; 656 nz = ai[r[row]+1] - diag[r[row]] - 1; 657 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 658 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 659 } 660 row = *ajtmp++; 661 } 662 /* finished row so overwrite it onto a->a */ 663 pv = a->a + ai[r[i]] ; 664 pj = aj + ai[r[i]] ; 665 nz = ai[r[i]+1] - ai[r[i]]; 666 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 667 668 rs = 0.0; 669 for (j=0; j<nz; j++) { 670 pv[j] = rtmp[pj[j]]; 671 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 672 } 673 674 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 675 sctx.rs = rs; 676 sctx.pv = pv[nbdiag]; 677 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 678 if (newshift == 1) break; 679 } 680 681 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 682 /* 683 * if no shift in this attempt & shifting & started shifting & can refine, 684 * then try lower shift 685 */ 686 sctx.shift_hi = info->shift_fraction; 687 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 688 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 689 sctx.lushift = PETSC_TRUE; 690 sctx.nshift++; 691 } 692 } while (sctx.lushift); 693 694 /* invert diagonal entries for simplier triangular solves */ 695 for (i=0; i<n; i++) { 696 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 697 } 698 699 ierr = PetscFree(rtmp);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 701 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 702 A->factor = FACTOR_LU; 703 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 704 A->assembled = PETSC_TRUE; 705 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 706 if (sctx.nshift){ 707 if (info->shiftnz) { 708 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 709 } else if (info->shiftpd) { 710 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 711 } 712 } 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 718 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 719 { 720 PetscFunctionBegin; 721 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 722 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 723 PetscFunctionReturn(0); 724 } 725 726 727 /* ----------------------------------------------------------- */ 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatLUFactor_SeqAIJ" 730 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 731 { 732 PetscErrorCode ierr; 733 Mat C; 734 735 PetscFunctionBegin; 736 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 737 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 738 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 739 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 /* ----------------------------------------------------------- */ 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatSolve_SeqAIJ" 745 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 746 { 747 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 748 IS iscol = a->col,isrow = a->row; 749 PetscErrorCode ierr; 750 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 751 PetscInt nz,*rout,*cout; 752 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 753 754 PetscFunctionBegin; 755 if (!n) PetscFunctionReturn(0); 756 757 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 758 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 759 tmp = a->solve_work; 760 761 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 762 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 763 764 /* forward solve the lower triangular */ 765 tmp[0] = b[*r++]; 766 tmps = tmp; 767 for (i=1; i<n; i++) { 768 v = aa + ai[i] ; 769 vi = aj + ai[i] ; 770 nz = a->diag[i] - ai[i]; 771 sum = b[*r++]; 772 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 773 tmp[i] = sum; 774 } 775 776 /* backward solve the upper triangular */ 777 for (i=n-1; i>=0; i--){ 778 v = aa + a->diag[i] + 1; 779 vi = aj + a->diag[i] + 1; 780 nz = ai[i+1] - a->diag[i] - 1; 781 sum = tmp[i]; 782 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 783 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 784 } 785 786 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 787 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 788 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 789 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 790 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatMatSolve_SeqAIJ" 796 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 797 { 798 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 799 IS iscol = a->col,isrow = a->row; 800 PetscErrorCode ierr; 801 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 802 PetscInt nz,*rout,*cout,neq; 803 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 804 805 PetscFunctionBegin; 806 if (!n) PetscFunctionReturn(0); 807 808 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 809 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 810 811 tmp = a->solve_work; 812 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 813 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 814 815 for (neq=0; neq<n; neq++){ 816 /* forward solve the lower triangular */ 817 tmp[0] = b[r[0]]; 818 tmps = tmp; 819 for (i=1; i<n; i++) { 820 v = aa + ai[i] ; 821 vi = aj + ai[i] ; 822 nz = a->diag[i] - ai[i]; 823 sum = b[r[i]]; 824 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 825 tmp[i] = sum; 826 } 827 /* backward solve the upper triangular */ 828 for (i=n-1; i>=0; i--){ 829 v = aa + a->diag[i] + 1; 830 vi = aj + a->diag[i] + 1; 831 nz = ai[i+1] - a->diag[i] - 1; 832 sum = tmp[i]; 833 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 834 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 835 } 836 837 b += n; 838 x += n; 839 } 840 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 841 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 842 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 843 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 844 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 850 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 851 { 852 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 853 IS iscol = a->col,isrow = a->row; 854 PetscErrorCode ierr; 855 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 856 PetscInt nz,*rout,*cout,row; 857 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 858 859 PetscFunctionBegin; 860 if (!n) PetscFunctionReturn(0); 861 862 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 863 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 864 tmp = a->solve_work; 865 866 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 867 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 868 869 /* forward solve the lower triangular */ 870 tmp[0] = b[*r++]; 871 tmps = tmp; 872 for (row=1; row<n; row++) { 873 i = rout[row]; /* permuted row */ 874 v = aa + ai[i] ; 875 vi = aj + ai[i] ; 876 nz = a->diag[i] - ai[i]; 877 sum = b[*r++]; 878 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 879 tmp[row] = sum; 880 } 881 882 /* backward solve the upper triangular */ 883 for (row=n-1; row>=0; row--){ 884 i = rout[row]; /* permuted row */ 885 v = aa + a->diag[i] + 1; 886 vi = aj + a->diag[i] + 1; 887 nz = ai[i+1] - a->diag[i] - 1; 888 sum = tmp[row]; 889 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 890 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 891 } 892 893 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 894 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 895 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 896 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 897 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 /* ----------------------------------------------------------- */ 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 904 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 905 { 906 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 907 PetscErrorCode ierr; 908 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 909 PetscScalar *x,*b,*aa = a->a; 910 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 911 PetscInt adiag_i,i,*vi,nz,ai_i; 912 PetscScalar *v,sum; 913 #endif 914 915 PetscFunctionBegin; 916 if (!n) PetscFunctionReturn(0); 917 918 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 919 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 920 921 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 922 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 923 #else 924 /* forward solve the lower triangular */ 925 x[0] = b[0]; 926 for (i=1; i<n; i++) { 927 ai_i = ai[i]; 928 v = aa + ai_i; 929 vi = aj + ai_i; 930 nz = adiag[i] - ai_i; 931 sum = b[i]; 932 while (nz--) sum -= *v++ * x[*vi++]; 933 x[i] = sum; 934 } 935 936 /* backward solve the upper triangular */ 937 for (i=n-1; i>=0; i--){ 938 adiag_i = adiag[i]; 939 v = aa + adiag_i + 1; 940 vi = aj + adiag_i + 1; 941 nz = ai[i+1] - adiag_i - 1; 942 sum = x[i]; 943 while (nz--) sum -= *v++ * x[*vi++]; 944 x[i] = sum*aa[adiag_i]; 945 } 946 #endif 947 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 948 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 949 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 955 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 956 { 957 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 958 IS iscol = a->col,isrow = a->row; 959 PetscErrorCode ierr; 960 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 961 PetscInt nz,*rout,*cout; 962 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 963 964 PetscFunctionBegin; 965 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 966 967 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 968 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 969 tmp = a->solve_work; 970 971 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 972 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 973 974 /* forward solve the lower triangular */ 975 tmp[0] = b[*r++]; 976 for (i=1; i<n; i++) { 977 v = aa + ai[i] ; 978 vi = aj + ai[i] ; 979 nz = a->diag[i] - ai[i]; 980 sum = b[*r++]; 981 while (nz--) sum -= *v++ * tmp[*vi++ ]; 982 tmp[i] = sum; 983 } 984 985 /* backward solve the upper triangular */ 986 for (i=n-1; i>=0; i--){ 987 v = aa + a->diag[i] + 1; 988 vi = aj + a->diag[i] + 1; 989 nz = ai[i+1] - a->diag[i] - 1; 990 sum = tmp[i]; 991 while (nz--) sum -= *v++ * tmp[*vi++ ]; 992 tmp[i] = sum*aa[a->diag[i]]; 993 x[*c--] += tmp[i]; 994 } 995 996 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 997 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 998 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 999 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1000 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1001 1002 PetscFunctionReturn(0); 1003 } 1004 /* -------------------------------------------------------------------*/ 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1007 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1008 { 1009 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1010 IS iscol = a->col,isrow = a->row; 1011 PetscErrorCode ierr; 1012 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1013 PetscInt nz,*rout,*cout,*diag = a->diag; 1014 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 1015 1016 PetscFunctionBegin; 1017 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1018 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1019 tmp = a->solve_work; 1020 1021 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1022 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1023 1024 /* copy the b into temp work space according to permutation */ 1025 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1026 1027 /* forward solve the U^T */ 1028 for (i=0; i<n; i++) { 1029 v = aa + diag[i] ; 1030 vi = aj + diag[i] + 1; 1031 nz = ai[i+1] - diag[i] - 1; 1032 s1 = tmp[i]; 1033 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1034 while (nz--) { 1035 tmp[*vi++ ] -= (*v++)*s1; 1036 } 1037 tmp[i] = s1; 1038 } 1039 1040 /* backward solve the L^T */ 1041 for (i=n-1; i>=0; i--){ 1042 v = aa + diag[i] - 1 ; 1043 vi = aj + diag[i] - 1 ; 1044 nz = diag[i] - ai[i]; 1045 s1 = tmp[i]; 1046 while (nz--) { 1047 tmp[*vi-- ] -= (*v--)*s1; 1048 } 1049 } 1050 1051 /* copy tmp into x according to permutation */ 1052 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1053 1054 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1055 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1056 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1057 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1058 1059 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1065 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1066 { 1067 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1068 IS iscol = a->col,isrow = a->row; 1069 PetscErrorCode ierr; 1070 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1071 PetscInt nz,*rout,*cout,*diag = a->diag; 1072 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 1073 1074 PetscFunctionBegin; 1075 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1076 1077 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1078 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1079 tmp = a->solve_work; 1080 1081 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1082 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1083 1084 /* copy the b into temp work space according to permutation */ 1085 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1086 1087 /* forward solve the U^T */ 1088 for (i=0; i<n; i++) { 1089 v = aa + diag[i] ; 1090 vi = aj + diag[i] + 1; 1091 nz = ai[i+1] - diag[i] - 1; 1092 tmp[i] *= *v++; 1093 while (nz--) { 1094 tmp[*vi++ ] -= (*v++)*tmp[i]; 1095 } 1096 } 1097 1098 /* backward solve the L^T */ 1099 for (i=n-1; i>=0; i--){ 1100 v = aa + diag[i] - 1 ; 1101 vi = aj + diag[i] - 1 ; 1102 nz = diag[i] - ai[i]; 1103 while (nz--) { 1104 tmp[*vi-- ] -= (*v--)*tmp[i]; 1105 } 1106 } 1107 1108 /* copy tmp into x according to permutation */ 1109 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1110 1111 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1112 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1113 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1114 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1115 1116 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 /* ----------------------------------------------------------------*/ 1120 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*); 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1124 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1125 { 1126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1127 IS isicol; 1128 PetscErrorCode ierr; 1129 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1130 PetscInt *bi,*cols,nnz,*cols_lvl; 1131 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1132 PetscInt i,levels,diagonal_fill; 1133 PetscTruth col_identity,row_identity; 1134 PetscReal f; 1135 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1136 PetscBT lnkbt; 1137 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1138 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1139 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1140 PetscTruth missing; 1141 1142 PetscFunctionBegin; 1143 f = info->fill; 1144 levels = (PetscInt)info->levels; 1145 diagonal_fill = (PetscInt)info->diagonal_fill; 1146 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1147 1148 /* special case that simply copies fill pattern */ 1149 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1150 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1151 if (!levels && row_identity && col_identity) { 1152 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1153 (*fact)->factor = FACTOR_LU; 1154 (*fact)->info.factor_mallocs = 0; 1155 (*fact)->info.fill_ratio_given = info->fill; 1156 (*fact)->info.fill_ratio_needed = 1.0; 1157 b = (Mat_SeqAIJ*)(*fact)->data; 1158 ierr = MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);CHKERRQ(ierr); 1159 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1160 b->row = isrow; 1161 b->col = iscol; 1162 b->icol = isicol; 1163 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1164 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1165 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1166 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1171 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1172 1173 /* get new row pointers */ 1174 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1175 bi[0] = 0; 1176 /* bdiag is location of diagonal in factor */ 1177 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1178 bdiag[0] = 0; 1179 1180 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1181 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1182 1183 /* create a linked list for storing column indices of the active row */ 1184 nlnk = n + 1; 1185 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1186 1187 /* initial FreeSpace size is f*(ai[n]+1) */ 1188 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1189 current_space = free_space; 1190 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1191 current_space_lvl = free_space_lvl; 1192 1193 for (i=0; i<n; i++) { 1194 nzi = 0; 1195 /* copy current row into linked list */ 1196 nnz = ai[r[i]+1] - ai[r[i]]; 1197 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1198 cols = aj + ai[r[i]]; 1199 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1200 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1201 nzi += nlnk; 1202 1203 /* make sure diagonal entry is included */ 1204 if (diagonal_fill && lnk[i] == -1) { 1205 fm = n; 1206 while (lnk[fm] < i) fm = lnk[fm]; 1207 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1208 lnk[fm] = i; 1209 lnk_lvl[i] = 0; 1210 nzi++; dcount++; 1211 } 1212 1213 /* add pivot rows into the active row */ 1214 nzbd = 0; 1215 prow = lnk[n]; 1216 while (prow < i) { 1217 nnz = bdiag[prow]; 1218 cols = bj_ptr[prow] + nnz + 1; 1219 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1220 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1221 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1222 nzi += nlnk; 1223 prow = lnk[prow]; 1224 nzbd++; 1225 } 1226 bdiag[i] = nzbd; 1227 bi[i+1] = bi[i] + nzi; 1228 1229 /* if free space is not available, make more free space */ 1230 if (current_space->local_remaining<nzi) { 1231 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1232 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1233 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1234 reallocs++; 1235 } 1236 1237 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1238 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1239 bj_ptr[i] = current_space->array; 1240 bjlvl_ptr[i] = current_space_lvl->array; 1241 1242 /* make sure the active row i has diagonal entry */ 1243 if (*(bj_ptr[i]+bdiag[i]) != i) { 1244 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1245 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1246 } 1247 1248 current_space->array += nzi; 1249 current_space->local_used += nzi; 1250 current_space->local_remaining -= nzi; 1251 current_space_lvl->array += nzi; 1252 current_space_lvl->local_used += nzi; 1253 current_space_lvl->local_remaining -= nzi; 1254 } 1255 1256 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1257 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1258 1259 /* destroy list of free space and other temporary arrays */ 1260 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1261 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1262 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1263 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1264 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1265 1266 #if defined(PETSC_USE_INFO) 1267 { 1268 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1269 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1270 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1271 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1272 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1273 if (diagonal_fill) { 1274 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1275 } 1276 } 1277 #endif 1278 1279 /* put together the new matrix */ 1280 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1281 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1282 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1283 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1285 b = (Mat_SeqAIJ*)(*fact)->data; 1286 b->free_a = PETSC_TRUE; 1287 b->free_ij = PETSC_TRUE; 1288 b->singlemalloc = PETSC_FALSE; 1289 len = (bi[n] )*sizeof(PetscScalar); 1290 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1291 b->j = bj; 1292 b->i = bi; 1293 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1294 b->diag = bdiag; 1295 b->ilen = 0; 1296 b->imax = 0; 1297 b->row = isrow; 1298 b->col = iscol; 1299 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1300 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1301 b->icol = isicol; 1302 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1303 /* In b structure: Free imax, ilen, old a, old j. 1304 Allocate bdiag, solve_work, new a, new j */ 1305 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1306 b->maxnz = b->nz = bi[n] ; 1307 (*fact)->factor = FACTOR_LU; 1308 (*fact)->info.factor_mallocs = reallocs; 1309 (*fact)->info.fill_ratio_given = f; 1310 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1311 1312 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1313 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1314 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #include "src/mat/impls/sbaij/seq/sbaij.h" 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1321 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1322 { 1323 Mat C = *B; 1324 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1325 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1326 IS ip=b->row,iip = b->icol; 1327 PetscErrorCode ierr; 1328 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1329 PetscInt *ai=a->i,*aj=a->j; 1330 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1331 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1332 PetscReal zeropivot,rs,shiftnz; 1333 PetscReal shiftpd; 1334 ChShift_Ctx sctx; 1335 PetscInt newshift; 1336 1337 PetscFunctionBegin; 1338 shiftnz = info->shiftnz; 1339 shiftpd = info->shiftpd; 1340 zeropivot = info->zeropivot; 1341 1342 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1343 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1344 1345 /* initialization */ 1346 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1347 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1348 jl = il + mbs; 1349 rtmp = (MatScalar*)(jl + mbs); 1350 1351 sctx.shift_amount = 0; 1352 sctx.nshift = 0; 1353 do { 1354 sctx.chshift = PETSC_FALSE; 1355 for (i=0; i<mbs; i++) { 1356 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1357 } 1358 1359 for (k = 0; k<mbs; k++){ 1360 bval = ba + bi[k]; 1361 /* initialize k-th row by the perm[k]-th row of A */ 1362 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1363 for (j = jmin; j < jmax; j++){ 1364 col = riip[aj[j]]; 1365 if (col >= k){ /* only take upper triangular entry */ 1366 rtmp[col] = aa[j]; 1367 *bval++ = 0.0; /* for in-place factorization */ 1368 } 1369 } 1370 /* shift the diagonal of the matrix */ 1371 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1372 1373 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1374 dk = rtmp[k]; 1375 i = jl[k]; /* first row to be added to k_th row */ 1376 1377 while (i < k){ 1378 nexti = jl[i]; /* next row to be added to k_th row */ 1379 1380 /* compute multiplier, update diag(k) and U(i,k) */ 1381 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1382 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1383 dk += uikdi*ba[ili]; 1384 ba[ili] = uikdi; /* -U(i,k) */ 1385 1386 /* add multiple of row i to k-th row */ 1387 jmin = ili + 1; jmax = bi[i+1]; 1388 if (jmin < jmax){ 1389 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1390 /* update il and jl for row i */ 1391 il[i] = jmin; 1392 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1393 } 1394 i = nexti; 1395 } 1396 1397 /* shift the diagonals when zero pivot is detected */ 1398 /* compute rs=sum of abs(off-diagonal) */ 1399 rs = 0.0; 1400 jmin = bi[k]+1; 1401 nz = bi[k+1] - jmin; 1402 if (nz > 0){ 1403 bcol = bj + jmin; 1404 while (nz--){ 1405 rs += PetscAbsScalar(rtmp[*bcol]); 1406 bcol++; 1407 } 1408 } 1409 1410 sctx.rs = rs; 1411 sctx.pv = dk; 1412 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1413 if (newshift == 1) break; 1414 1415 /* copy data into U(k,:) */ 1416 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1417 jmin = bi[k]+1; jmax = bi[k+1]; 1418 if (jmin < jmax) { 1419 for (j=jmin; j<jmax; j++){ 1420 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1421 } 1422 /* add the k-th row into il and jl */ 1423 il[k] = jmin; 1424 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1425 } 1426 } 1427 } while (sctx.chshift); 1428 ierr = PetscFree(il);CHKERRQ(ierr); 1429 1430 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1431 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1432 C->factor = FACTOR_CHOLESKY; 1433 C->assembled = PETSC_TRUE; 1434 C->preallocated = PETSC_TRUE; 1435 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1436 if (sctx.nshift){ 1437 if (shiftnz) { 1438 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1439 } else if (shiftpd) { 1440 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1441 } 1442 } 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1448 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1449 { 1450 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1451 Mat_SeqSBAIJ *b; 1452 Mat B; 1453 PetscErrorCode ierr; 1454 PetscTruth perm_identity; 1455 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1456 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1457 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1458 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1459 PetscReal fill=info->fill,levels=info->levels; 1460 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1461 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1462 PetscBT lnkbt; 1463 1464 PetscFunctionBegin; 1465 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1466 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1467 1468 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1469 ui[0] = 0; 1470 1471 /* special case that simply copies fill pattern */ 1472 if (!levels && perm_identity) { 1473 for (i=0; i<am; i++) { 1474 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1475 } 1476 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1477 cols = uj; 1478 for (i=0; i<am; i++) { 1479 aj = a->j + a->diag[i]; 1480 ncols = ui[i+1] - ui[i]; 1481 for (j=0; j<ncols; j++) *cols++ = *aj++; 1482 } 1483 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1484 /* initialization */ 1485 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1486 1487 /* jl: linked list for storing indices of the pivot rows 1488 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1489 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1490 il = jl + am; 1491 uj_ptr = (PetscInt**)(il + am); 1492 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1493 for (i=0; i<am; i++){ 1494 jl[i] = am; il[i] = 0; 1495 } 1496 1497 /* create and initialize a linked list for storing column indices of the active row k */ 1498 nlnk = am + 1; 1499 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1500 1501 /* initial FreeSpace size is fill*(ai[am]+1) */ 1502 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1503 current_space = free_space; 1504 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1505 current_space_lvl = free_space_lvl; 1506 1507 for (k=0; k<am; k++){ /* for each active row k */ 1508 /* initialize lnk by the column indices of row rip[k] of A */ 1509 nzk = 0; 1510 ncols = ai[rip[k]+1] - ai[rip[k]]; 1511 ncols_upper = 0; 1512 for (j=0; j<ncols; j++){ 1513 i = *(aj + ai[rip[k]] + j); 1514 if (rip[i] >= k){ /* only take upper triangular entry */ 1515 ajtmp[ncols_upper] = i; 1516 ncols_upper++; 1517 } 1518 } 1519 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1520 nzk += nlnk; 1521 1522 /* update lnk by computing fill-in for each pivot row to be merged in */ 1523 prow = jl[k]; /* 1st pivot row */ 1524 1525 while (prow < k){ 1526 nextprow = jl[prow]; 1527 1528 /* merge prow into k-th row */ 1529 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1530 jmax = ui[prow+1]; 1531 ncols = jmax-jmin; 1532 i = jmin - ui[prow]; 1533 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1534 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1535 j = *(uj - 1); 1536 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1537 nzk += nlnk; 1538 1539 /* update il and jl for prow */ 1540 if (jmin < jmax){ 1541 il[prow] = jmin; 1542 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1543 } 1544 prow = nextprow; 1545 } 1546 1547 /* if free space is not available, make more free space */ 1548 if (current_space->local_remaining<nzk) { 1549 i = am - k + 1; /* num of unfactored rows */ 1550 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1551 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1552 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1553 reallocs++; 1554 } 1555 1556 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1557 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1558 1559 /* add the k-th row into il and jl */ 1560 if (nzk > 1){ 1561 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1562 jl[k] = jl[i]; jl[i] = k; 1563 il[k] = ui[k] + 1; 1564 } 1565 uj_ptr[k] = current_space->array; 1566 uj_lvl_ptr[k] = current_space_lvl->array; 1567 1568 current_space->array += nzk; 1569 current_space->local_used += nzk; 1570 current_space->local_remaining -= nzk; 1571 1572 current_space_lvl->array += nzk; 1573 current_space_lvl->local_used += nzk; 1574 current_space_lvl->local_remaining -= nzk; 1575 1576 ui[k+1] = ui[k] + nzk; 1577 } 1578 1579 #if defined(PETSC_USE_INFO) 1580 if (ai[am] != 0) { 1581 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1582 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1583 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1584 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1585 } else { 1586 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1587 } 1588 #endif 1589 1590 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1591 ierr = PetscFree(jl);CHKERRQ(ierr); 1592 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1593 1594 /* destroy list of free space and other temporary array(s) */ 1595 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1596 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1597 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1598 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1599 1600 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1601 1602 /* put together the new matrix in MATSEQSBAIJ format */ 1603 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1604 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1605 B = *fact; 1606 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1607 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1608 1609 b = (Mat_SeqSBAIJ*)B->data; 1610 b->singlemalloc = PETSC_FALSE; 1611 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1612 b->j = uj; 1613 b->i = ui; 1614 b->diag = 0; 1615 b->ilen = 0; 1616 b->imax = 0; 1617 b->row = perm; 1618 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1619 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1620 b->icol = perm; 1621 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1622 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1623 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1624 b->maxnz = b->nz = ui[am]; 1625 b->free_a = PETSC_TRUE; 1626 b->free_ij = PETSC_TRUE; 1627 1628 B->factor = FACTOR_CHOLESKY; 1629 B->info.factor_mallocs = reallocs; 1630 B->info.fill_ratio_given = fill; 1631 if (ai[am] != 0) { 1632 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1633 } else { 1634 B->info.fill_ratio_needed = 0.0; 1635 } 1636 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1637 if (perm_identity){ 1638 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1639 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1640 } 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1646 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1647 { 1648 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1649 Mat_SeqSBAIJ *b; 1650 Mat B; 1651 PetscErrorCode ierr; 1652 PetscTruth perm_identity; 1653 PetscReal fill = info->fill; 1654 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1655 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1656 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1657 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1658 PetscBT lnkbt; 1659 IS iperm; 1660 1661 PetscFunctionBegin; 1662 /* check whether perm is the identity mapping */ 1663 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1664 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1665 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1666 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1667 1668 /* initialization */ 1669 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1670 ui[0] = 0; 1671 1672 /* jl: linked list for storing indices of the pivot rows 1673 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1674 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1675 il = jl + am; 1676 cols = il + am; 1677 ui_ptr = (PetscInt**)(cols + am); 1678 for (i=0; i<am; i++){ 1679 jl[i] = am; il[i] = 0; 1680 } 1681 1682 /* create and initialize a linked list for storing column indices of the active row k */ 1683 nlnk = am + 1; 1684 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1685 1686 /* initial FreeSpace size is fill*(ai[am]+1) */ 1687 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1688 current_space = free_space; 1689 1690 for (k=0; k<am; k++){ /* for each active row k */ 1691 /* initialize lnk by the column indices of row rip[k] of A */ 1692 nzk = 0; 1693 ncols = ai[rip[k]+1] - ai[rip[k]]; 1694 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1695 ncols_upper = 0; 1696 for (j=0; j<ncols; j++){ 1697 i = riip[*(aj + ai[rip[k]] + j)]; 1698 if (i >= k){ /* only take upper triangular entry */ 1699 cols[ncols_upper] = i; 1700 ncols_upper++; 1701 } 1702 } 1703 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1704 nzk += nlnk; 1705 1706 /* update lnk by computing fill-in for each pivot row to be merged in */ 1707 prow = jl[k]; /* 1st pivot row */ 1708 1709 while (prow < k){ 1710 nextprow = jl[prow]; 1711 /* merge prow into k-th row */ 1712 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1713 jmax = ui[prow+1]; 1714 ncols = jmax-jmin; 1715 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1716 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1717 nzk += nlnk; 1718 1719 /* update il and jl for prow */ 1720 if (jmin < jmax){ 1721 il[prow] = jmin; 1722 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1723 } 1724 prow = nextprow; 1725 } 1726 1727 /* if free space is not available, make more free space */ 1728 if (current_space->local_remaining<nzk) { 1729 i = am - k + 1; /* num of unfactored rows */ 1730 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1731 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1732 reallocs++; 1733 } 1734 1735 /* copy data into free space, then initialize lnk */ 1736 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1737 1738 /* add the k-th row into il and jl */ 1739 if (nzk-1 > 0){ 1740 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1741 jl[k] = jl[i]; jl[i] = k; 1742 il[k] = ui[k] + 1; 1743 } 1744 ui_ptr[k] = current_space->array; 1745 current_space->array += nzk; 1746 current_space->local_used += nzk; 1747 current_space->local_remaining -= nzk; 1748 1749 ui[k+1] = ui[k] + nzk; 1750 } 1751 1752 #if defined(PETSC_USE_INFO) 1753 if (ai[am] != 0) { 1754 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1755 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1756 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1757 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1758 } else { 1759 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1760 } 1761 #endif 1762 1763 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1764 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1765 ierr = PetscFree(jl);CHKERRQ(ierr); 1766 1767 /* destroy list of free space and other temporary array(s) */ 1768 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1769 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1770 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1771 1772 /* put together the new matrix in MATSEQSBAIJ format */ 1773 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1774 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1775 B = *fact; 1776 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1777 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1778 1779 b = (Mat_SeqSBAIJ*)B->data; 1780 b->singlemalloc = PETSC_FALSE; 1781 b->free_a = PETSC_TRUE; 1782 b->free_ij = PETSC_TRUE; 1783 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1784 b->j = uj; 1785 b->i = ui; 1786 b->diag = 0; 1787 b->ilen = 0; 1788 b->imax = 0; 1789 b->row = perm; 1790 b->col = perm; 1791 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1792 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1793 b->icol = iperm; 1794 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1795 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1796 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1797 b->maxnz = b->nz = ui[am]; 1798 1799 B->factor = FACTOR_CHOLESKY; 1800 B->info.factor_mallocs = reallocs; 1801 B->info.fill_ratio_given = fill; 1802 if (ai[am] != 0) { 1803 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1804 } else { 1805 B->info.fill_ratio_needed = 0.0; 1806 } 1807 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1808 if (perm_identity){ 1809 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1810 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1811 } 1812 PetscFunctionReturn(0); 1813 } 1814