1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 9 /* 10 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 11 12 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 13 */ 14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 15 { 16 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 19 const PetscInt *ai = a->i, *aj = a->j; 20 const PetscScalar *aa = a->a; 21 PetscBool *done; 22 PetscReal best,past = 0,future; 23 24 PetscFunctionBegin; 25 /* pick initial row */ 26 best = -1; 27 for (i=0; i<n; i++) { 28 future = 0.0; 29 for (j=ai[i]; j<ai[i+1]; j++) { 30 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 31 else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc1(n,&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc1(n,&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 96 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 97 { 98 PetscInt n = A->rmap->n; 99 PetscErrorCode ierr; 100 101 PetscFunctionBegin; 102 #if defined(PETSC_USE_COMPLEX) 103 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 104 #endif 105 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 106 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 107 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 108 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 109 110 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 111 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 112 113 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 114 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 115 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 116 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 117 118 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 119 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 120 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 121 (*B)->factortype = ftype; 122 PetscFunctionReturn(0); 123 } 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 127 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 128 { 129 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 130 IS isicol; 131 PetscErrorCode ierr; 132 const PetscInt *r,*ic; 133 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 134 PetscInt *bi,*bj,*ajtmp; 135 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 136 PetscReal f; 137 PetscInt nlnk,*lnk,k,**bi_ptr; 138 PetscFreeSpaceList free_space=NULL,current_space=NULL; 139 PetscBT lnkbt; 140 PetscBool missing; 141 142 PetscFunctionBegin; 143 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 144 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 145 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 146 147 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 148 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 149 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 150 151 /* get new row pointers */ 152 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 153 bi[0] = 0; 154 155 /* bdiag is location of diagonal in factor */ 156 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 157 bdiag[0] = 0; 158 159 /* linked list for storing column indices of the active row */ 160 nlnk = n + 1; 161 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 162 163 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 164 165 /* initial FreeSpace size is f*(ai[n]+1) */ 166 f = info->fill; 167 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 168 current_space = free_space; 169 170 for (i=0; i<n; i++) { 171 /* copy previous fill into linked list */ 172 nzi = 0; 173 nnz = ai[r[i]+1] - ai[r[i]]; 174 ajtmp = aj + ai[r[i]]; 175 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 176 nzi += nlnk; 177 178 /* add pivot rows into linked list */ 179 row = lnk[n]; 180 while (row < i) { 181 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 182 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 183 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 184 nzi += nlnk; 185 row = lnk[row]; 186 } 187 bi[i+1] = bi[i] + nzi; 188 im[i] = nzi; 189 190 /* mark bdiag */ 191 nzbd = 0; 192 nnz = nzi; 193 k = lnk[n]; 194 while (nnz-- && k < i) { 195 nzbd++; 196 k = lnk[k]; 197 } 198 bdiag[i] = bi[i] + nzbd; 199 200 /* if free space is not available, make more free space */ 201 if (current_space->local_remaining<nzi) { 202 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 203 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 204 reallocs++; 205 } 206 207 /* copy data into free space, then initialize lnk */ 208 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 209 210 bi_ptr[i] = current_space->array; 211 current_space->array += nzi; 212 current_space->local_used += nzi; 213 current_space->local_remaining -= nzi; 214 } 215 #if defined(PETSC_USE_INFO) 216 if (ai[n] != 0) { 217 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 218 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 219 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 220 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 221 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 222 } else { 223 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 224 } 225 #endif 226 227 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 228 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 229 230 /* destroy list of free space and other temporary array(s) */ 231 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 232 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 233 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 234 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 235 236 /* put together the new matrix */ 237 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 238 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 239 b = (Mat_SeqAIJ*)(B)->data; 240 241 b->free_a = PETSC_TRUE; 242 b->free_ij = PETSC_TRUE; 243 b->singlemalloc = PETSC_FALSE; 244 245 ierr = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr); 246 b->j = bj; 247 b->i = bi; 248 b->diag = bdiag; 249 b->ilen = 0; 250 b->imax = 0; 251 b->row = isrow; 252 b->col = iscol; 253 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 254 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 255 b->icol = isicol; 256 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 257 258 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 259 ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 260 b->maxnz = b->nz = bi[n]; 261 262 (B)->factortype = MAT_FACTOR_LU; 263 (B)->info.factor_mallocs = reallocs; 264 (B)->info.fill_ratio_given = f; 265 266 if (ai[n]) { 267 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 268 } else { 269 (B)->info.fill_ratio_needed = 0.0; 270 } 271 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 272 if (a->inode.size) { 273 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 280 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 281 { 282 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 283 IS isicol; 284 PetscErrorCode ierr; 285 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 286 PetscInt i,n=A->rmap->n; 287 PetscInt *bi,*bj; 288 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 289 PetscReal f; 290 PetscInt nlnk,*lnk,k,**bi_ptr; 291 PetscFreeSpaceList free_space=NULL,current_space=NULL; 292 PetscBT lnkbt; 293 PetscBool missing; 294 295 PetscFunctionBegin; 296 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 297 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 298 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 299 300 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 301 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 302 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 303 304 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 305 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 306 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 307 bi[0] = bdiag[0] = 0; 308 309 /* linked list for storing column indices of the active row */ 310 nlnk = n + 1; 311 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 312 313 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 314 315 /* initial FreeSpace size is f*(ai[n]+1) */ 316 f = info->fill; 317 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 318 current_space = free_space; 319 320 for (i=0; i<n; i++) { 321 /* copy previous fill into linked list */ 322 nzi = 0; 323 nnz = ai[r[i]+1] - ai[r[i]]; 324 ajtmp = aj + ai[r[i]]; 325 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 326 nzi += nlnk; 327 328 /* add pivot rows into linked list */ 329 row = lnk[n]; 330 while (row < i) { 331 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 332 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 333 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 334 nzi += nlnk; 335 row = lnk[row]; 336 } 337 bi[i+1] = bi[i] + nzi; 338 im[i] = nzi; 339 340 /* mark bdiag */ 341 nzbd = 0; 342 nnz = nzi; 343 k = lnk[n]; 344 while (nnz-- && k < i) { 345 nzbd++; 346 k = lnk[k]; 347 } 348 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 349 350 /* if free space is not available, make more free space */ 351 if (current_space->local_remaining<nzi) { 352 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 353 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 354 reallocs++; 355 } 356 357 /* copy data into free space, then initialize lnk */ 358 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 359 360 bi_ptr[i] = current_space->array; 361 current_space->array += nzi; 362 current_space->local_used += nzi; 363 current_space->local_remaining -= nzi; 364 } 365 366 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 367 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 368 369 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 370 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 371 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 373 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 374 375 /* put together the new matrix */ 376 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 377 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 378 b = (Mat_SeqAIJ*)(B)->data; 379 380 b->free_a = PETSC_TRUE; 381 b->free_ij = PETSC_TRUE; 382 b->singlemalloc = PETSC_FALSE; 383 384 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 385 386 b->j = bj; 387 b->i = bi; 388 b->diag = bdiag; 389 b->ilen = 0; 390 b->imax = 0; 391 b->row = isrow; 392 b->col = iscol; 393 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 394 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 395 b->icol = isicol; 396 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 397 398 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 399 ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 400 b->maxnz = b->nz = bdiag[0]+1; 401 402 B->factortype = MAT_FACTOR_LU; 403 B->info.factor_mallocs = reallocs; 404 B->info.fill_ratio_given = f; 405 406 if (ai[n]) { 407 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 408 } else { 409 B->info.fill_ratio_needed = 0.0; 410 } 411 #if defined(PETSC_USE_INFO) 412 if (ai[n] != 0) { 413 PetscReal af = B->info.fill_ratio_needed; 414 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 415 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 416 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 417 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 418 } else { 419 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 420 } 421 #endif 422 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 423 if (a->inode.size) { 424 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 425 } 426 ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr); 427 PetscFunctionReturn(0); 428 } 429 430 /* 431 Trouble in factorization, should we dump the original matrix? 432 */ 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatFactorDumpMatrix" 435 PetscErrorCode MatFactorDumpMatrix(Mat A) 436 { 437 PetscErrorCode ierr; 438 PetscBool flg = PETSC_FALSE; 439 440 PetscFunctionBegin; 441 ierr = PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 442 if (flg) { 443 PetscViewer viewer; 444 char filename[PETSC_MAX_PATH_LEN]; 445 446 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 447 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 448 ierr = MatView(A,viewer);CHKERRQ(ierr); 449 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 450 } 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 456 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 457 { 458 Mat C =B; 459 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 460 IS isrow = b->row,isicol = b->icol; 461 PetscErrorCode ierr; 462 const PetscInt *r,*ic,*ics; 463 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 464 PetscInt i,j,k,nz,nzL,row,*pj; 465 const PetscInt *ajtmp,*bjtmp; 466 MatScalar *rtmp,*pc,multiplier,*pv; 467 const MatScalar *aa=a->a,*v; 468 PetscBool row_identity,col_identity; 469 FactorShiftCtx sctx; 470 const PetscInt *ddiag; 471 PetscReal rs; 472 MatScalar d; 473 474 PetscFunctionBegin; 475 /* MatPivotSetUp(): initialize shift context sctx */ 476 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 477 478 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 479 ddiag = a->diag; 480 sctx.shift_top = info->zeropivot; 481 for (i=0; i<n; i++) { 482 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 483 d = (aa)[ddiag[i]]; 484 rs = -PetscAbsScalar(d) - PetscRealPart(d); 485 v = aa+ai[i]; 486 nz = ai[i+1] - ai[i]; 487 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 488 if (rs>sctx.shift_top) sctx.shift_top = rs; 489 } 490 sctx.shift_top *= 1.1; 491 sctx.nshift_max = 5; 492 sctx.shift_lo = 0.; 493 sctx.shift_hi = 1.; 494 } 495 496 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 497 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 498 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 499 ics = ic; 500 501 do { 502 sctx.newshift = PETSC_FALSE; 503 for (i=0; i<n; i++) { 504 /* zero rtmp */ 505 /* L part */ 506 nz = bi[i+1] - bi[i]; 507 bjtmp = bj + bi[i]; 508 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 509 510 /* U part */ 511 nz = bdiag[i]-bdiag[i+1]; 512 bjtmp = bj + bdiag[i+1]+1; 513 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 514 515 /* load in initial (unfactored row) */ 516 nz = ai[r[i]+1] - ai[r[i]]; 517 ajtmp = aj + ai[r[i]]; 518 v = aa + ai[r[i]]; 519 for (j=0; j<nz; j++) { 520 rtmp[ics[ajtmp[j]]] = v[j]; 521 } 522 /* ZeropivotApply() */ 523 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 524 525 /* elimination */ 526 bjtmp = bj + bi[i]; 527 row = *bjtmp++; 528 nzL = bi[i+1] - bi[i]; 529 for (k=0; k < nzL; k++) { 530 pc = rtmp + row; 531 if (*pc != 0.0) { 532 pv = b->a + bdiag[row]; 533 multiplier = *pc * (*pv); 534 *pc = multiplier; 535 536 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 537 pv = b->a + bdiag[row+1]+1; 538 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 539 540 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 541 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 542 } 543 row = *bjtmp++; 544 } 545 546 /* finished row so stick it into b->a */ 547 rs = 0.0; 548 /* L part */ 549 pv = b->a + bi[i]; 550 pj = b->j + bi[i]; 551 nz = bi[i+1] - bi[i]; 552 for (j=0; j<nz; j++) { 553 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 554 } 555 556 /* U part */ 557 pv = b->a + bdiag[i+1]+1; 558 pj = b->j + bdiag[i+1]+1; 559 nz = bdiag[i] - bdiag[i+1]-1; 560 for (j=0; j<nz; j++) { 561 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 562 } 563 564 sctx.rs = rs; 565 sctx.pv = rtmp[i]; 566 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 567 if (sctx.newshift) break; /* break for-loop */ 568 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 569 570 /* Mark diagonal and invert diagonal for simplier triangular solves */ 571 pv = b->a + bdiag[i]; 572 *pv = 1.0/rtmp[i]; 573 574 } /* endof for (i=0; i<n; i++) { */ 575 576 /* MatPivotRefine() */ 577 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 578 /* 579 * if no shift in this attempt & shifting & started shifting & can refine, 580 * then try lower shift 581 */ 582 sctx.shift_hi = sctx.shift_fraction; 583 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 584 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 585 sctx.newshift = PETSC_TRUE; 586 sctx.nshift++; 587 } 588 } while (sctx.newshift); 589 590 ierr = PetscFree(rtmp);CHKERRQ(ierr); 591 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 592 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 593 594 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 595 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 596 if (b->inode.size) { 597 C->ops->solve = MatSolve_SeqAIJ_Inode; 598 } else if (row_identity && col_identity) { 599 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 600 } else { 601 C->ops->solve = MatSolve_SeqAIJ; 602 } 603 C->ops->solveadd = MatSolveAdd_SeqAIJ; 604 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 605 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 606 C->ops->matsolve = MatMatSolve_SeqAIJ; 607 C->assembled = PETSC_TRUE; 608 C->preallocated = PETSC_TRUE; 609 610 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 611 612 /* MatShiftView(A,info,&sctx) */ 613 if (sctx.nshift) { 614 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 615 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 616 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 617 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 618 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 619 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 627 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 628 { 629 Mat C =B; 630 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 631 IS isrow = b->row,isicol = b->icol; 632 PetscErrorCode ierr; 633 const PetscInt *r,*ic,*ics; 634 PetscInt nz,row,i,j,n=A->rmap->n,diag; 635 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 636 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 637 MatScalar *pv,*rtmp,*pc,multiplier,d; 638 const MatScalar *v,*aa=a->a; 639 PetscReal rs=0.0; 640 FactorShiftCtx sctx; 641 const PetscInt *ddiag; 642 PetscBool row_identity, col_identity; 643 644 PetscFunctionBegin; 645 /* MatPivotSetUp(): initialize shift context sctx */ 646 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 647 648 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 649 ddiag = a->diag; 650 sctx.shift_top = info->zeropivot; 651 for (i=0; i<n; i++) { 652 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 653 d = (aa)[ddiag[i]]; 654 rs = -PetscAbsScalar(d) - PetscRealPart(d); 655 v = aa+ai[i]; 656 nz = ai[i+1] - ai[i]; 657 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 658 if (rs>sctx.shift_top) sctx.shift_top = rs; 659 } 660 sctx.shift_top *= 1.1; 661 sctx.nshift_max = 5; 662 sctx.shift_lo = 0.; 663 sctx.shift_hi = 1.; 664 } 665 666 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 667 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 668 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 669 ics = ic; 670 671 do { 672 sctx.newshift = PETSC_FALSE; 673 for (i=0; i<n; i++) { 674 nz = bi[i+1] - bi[i]; 675 bjtmp = bj + bi[i]; 676 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 677 678 /* load in initial (unfactored row) */ 679 nz = ai[r[i]+1] - ai[r[i]]; 680 ajtmp = aj + ai[r[i]]; 681 v = aa + ai[r[i]]; 682 for (j=0; j<nz; j++) { 683 rtmp[ics[ajtmp[j]]] = v[j]; 684 } 685 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 686 687 row = *bjtmp++; 688 while (row < i) { 689 pc = rtmp + row; 690 if (*pc != 0.0) { 691 pv = b->a + diag_offset[row]; 692 pj = b->j + diag_offset[row] + 1; 693 multiplier = *pc / *pv++; 694 *pc = multiplier; 695 nz = bi[row+1] - diag_offset[row] - 1; 696 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 697 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 698 } 699 row = *bjtmp++; 700 } 701 /* finished row so stick it into b->a */ 702 pv = b->a + bi[i]; 703 pj = b->j + bi[i]; 704 nz = bi[i+1] - bi[i]; 705 diag = diag_offset[i] - bi[i]; 706 rs = 0.0; 707 for (j=0; j<nz; j++) { 708 pv[j] = rtmp[pj[j]]; 709 rs += PetscAbsScalar(pv[j]); 710 } 711 rs -= PetscAbsScalar(pv[diag]); 712 713 sctx.rs = rs; 714 sctx.pv = pv[diag]; 715 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 716 if (sctx.newshift) break; 717 pv[diag] = sctx.pv; 718 } 719 720 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 721 /* 722 * if no shift in this attempt & shifting & started shifting & can refine, 723 * then try lower shift 724 */ 725 sctx.shift_hi = sctx.shift_fraction; 726 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 727 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 728 sctx.newshift = PETSC_TRUE; 729 sctx.nshift++; 730 } 731 } while (sctx.newshift); 732 733 /* invert diagonal entries for simplier triangular solves */ 734 for (i=0; i<n; i++) { 735 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 736 } 737 ierr = PetscFree(rtmp);CHKERRQ(ierr); 738 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 739 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 740 741 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 742 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 743 if (row_identity && col_identity) { 744 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 745 } else { 746 C->ops->solve = MatSolve_SeqAIJ_inplace; 747 } 748 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 749 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 750 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 751 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 752 753 C->assembled = PETSC_TRUE; 754 C->preallocated = PETSC_TRUE; 755 756 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 757 if (sctx.nshift) { 758 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 759 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 760 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 761 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 762 } 763 } 764 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 765 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 766 767 ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 /* 772 This routine implements inplace ILU(0) with row or/and column permutations. 773 Input: 774 A - original matrix 775 Output; 776 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 777 a->j (col index) is permuted by the inverse of colperm, then sorted 778 a->a reordered accordingly with a->j 779 a->diag (ptr to diagonal elements) is updated. 780 */ 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 783 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 784 { 785 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 786 IS isrow = a->row,isicol = a->icol; 787 PetscErrorCode ierr; 788 const PetscInt *r,*ic,*ics; 789 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 790 PetscInt *ajtmp,nz,row; 791 PetscInt *diag = a->diag,nbdiag,*pj; 792 PetscScalar *rtmp,*pc,multiplier,d; 793 MatScalar *pv,*v; 794 PetscReal rs; 795 FactorShiftCtx sctx; 796 const MatScalar *aa=a->a,*vtmp; 797 798 PetscFunctionBegin; 799 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 800 801 /* MatPivotSetUp(): initialize shift context sctx */ 802 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 803 804 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 805 const PetscInt *ddiag = a->diag; 806 sctx.shift_top = info->zeropivot; 807 for (i=0; i<n; i++) { 808 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 809 d = (aa)[ddiag[i]]; 810 rs = -PetscAbsScalar(d) - PetscRealPart(d); 811 vtmp = aa+ai[i]; 812 nz = ai[i+1] - ai[i]; 813 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 814 if (rs>sctx.shift_top) sctx.shift_top = rs; 815 } 816 sctx.shift_top *= 1.1; 817 sctx.nshift_max = 5; 818 sctx.shift_lo = 0.; 819 sctx.shift_hi = 1.; 820 } 821 822 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 823 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 824 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 825 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 826 ics = ic; 827 828 #if defined(MV) 829 sctx.shift_top = 0.; 830 sctx.nshift_max = 0; 831 sctx.shift_lo = 0.; 832 sctx.shift_hi = 0.; 833 sctx.shift_fraction = 0.; 834 835 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 836 sctx.shift_top = 0.; 837 for (i=0; i<n; i++) { 838 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 839 d = (a->a)[diag[i]]; 840 rs = -PetscAbsScalar(d) - PetscRealPart(d); 841 v = a->a+ai[i]; 842 nz = ai[i+1] - ai[i]; 843 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 844 if (rs>sctx.shift_top) sctx.shift_top = rs; 845 } 846 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 847 sctx.shift_top *= 1.1; 848 sctx.nshift_max = 5; 849 sctx.shift_lo = 0.; 850 sctx.shift_hi = 1.; 851 } 852 853 sctx.shift_amount = 0.; 854 sctx.nshift = 0; 855 #endif 856 857 do { 858 sctx.newshift = PETSC_FALSE; 859 for (i=0; i<n; i++) { 860 /* load in initial unfactored row */ 861 nz = ai[r[i]+1] - ai[r[i]]; 862 ajtmp = aj + ai[r[i]]; 863 v = a->a + ai[r[i]]; 864 /* sort permuted ajtmp and values v accordingly */ 865 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 866 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 867 868 diag[r[i]] = ai[r[i]]; 869 for (j=0; j<nz; j++) { 870 rtmp[ajtmp[j]] = v[j]; 871 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 872 } 873 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 874 875 row = *ajtmp++; 876 while (row < i) { 877 pc = rtmp + row; 878 if (*pc != 0.0) { 879 pv = a->a + diag[r[row]]; 880 pj = aj + diag[r[row]] + 1; 881 882 multiplier = *pc / *pv++; 883 *pc = multiplier; 884 nz = ai[r[row]+1] - diag[r[row]] - 1; 885 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 886 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 887 } 888 row = *ajtmp++; 889 } 890 /* finished row so overwrite it onto a->a */ 891 pv = a->a + ai[r[i]]; 892 pj = aj + ai[r[i]]; 893 nz = ai[r[i]+1] - ai[r[i]]; 894 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 895 896 rs = 0.0; 897 for (j=0; j<nz; j++) { 898 pv[j] = rtmp[pj[j]]; 899 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 900 } 901 902 sctx.rs = rs; 903 sctx.pv = pv[nbdiag]; 904 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 905 if (sctx.newshift) break; 906 pv[nbdiag] = sctx.pv; 907 } 908 909 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 910 /* 911 * if no shift in this attempt & shifting & started shifting & can refine, 912 * then try lower shift 913 */ 914 sctx.shift_hi = sctx.shift_fraction; 915 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 916 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 917 sctx.newshift = PETSC_TRUE; 918 sctx.nshift++; 919 } 920 } while (sctx.newshift); 921 922 /* invert diagonal entries for simplier triangular solves */ 923 for (i=0; i<n; i++) { 924 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 925 } 926 927 ierr = PetscFree(rtmp);CHKERRQ(ierr); 928 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 929 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 930 931 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 932 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 933 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 934 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 935 936 A->assembled = PETSC_TRUE; 937 A->preallocated = PETSC_TRUE; 938 939 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 940 if (sctx.nshift) { 941 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 942 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 943 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 944 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 945 } 946 } 947 PetscFunctionReturn(0); 948 } 949 950 /* ----------------------------------------------------------- */ 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatLUFactor_SeqAIJ" 953 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 954 { 955 PetscErrorCode ierr; 956 Mat C; 957 958 PetscFunctionBegin; 959 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 960 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 961 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 962 963 A->ops->solve = C->ops->solve; 964 A->ops->solvetranspose = C->ops->solvetranspose; 965 966 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 967 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 /* ----------------------------------------------------------- */ 971 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 975 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 976 { 977 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 978 IS iscol = a->col,isrow = a->row; 979 PetscErrorCode ierr; 980 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 981 PetscInt nz; 982 const PetscInt *rout,*cout,*r,*c; 983 PetscScalar *x,*tmp,*tmps,sum; 984 const PetscScalar *b; 985 const MatScalar *aa = a->a,*v; 986 987 PetscFunctionBegin; 988 if (!n) PetscFunctionReturn(0); 989 990 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 991 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 992 tmp = a->solve_work; 993 994 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 995 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 996 997 /* forward solve the lower triangular */ 998 tmp[0] = b[*r++]; 999 tmps = tmp; 1000 for (i=1; i<n; i++) { 1001 v = aa + ai[i]; 1002 vi = aj + ai[i]; 1003 nz = a->diag[i] - ai[i]; 1004 sum = b[*r++]; 1005 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1006 tmp[i] = sum; 1007 } 1008 1009 /* backward solve the upper triangular */ 1010 for (i=n-1; i>=0; i--) { 1011 v = aa + a->diag[i] + 1; 1012 vi = aj + a->diag[i] + 1; 1013 nz = ai[i+1] - a->diag[i] - 1; 1014 sum = tmp[i]; 1015 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1016 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1017 } 1018 1019 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1020 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1021 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1022 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1023 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1029 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1030 { 1031 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1032 IS iscol = a->col,isrow = a->row; 1033 PetscErrorCode ierr; 1034 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1035 PetscInt nz,neq; 1036 const PetscInt *rout,*cout,*r,*c; 1037 PetscScalar *x,*b,*tmp,*tmps,sum; 1038 const MatScalar *aa = a->a,*v; 1039 PetscBool bisdense,xisdense; 1040 1041 PetscFunctionBegin; 1042 if (!n) PetscFunctionReturn(0); 1043 1044 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1045 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1046 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1047 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1048 1049 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1050 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1051 1052 tmp = a->solve_work; 1053 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1054 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1055 1056 for (neq=0; neq<B->cmap->n; neq++) { 1057 /* forward solve the lower triangular */ 1058 tmp[0] = b[r[0]]; 1059 tmps = tmp; 1060 for (i=1; i<n; i++) { 1061 v = aa + ai[i]; 1062 vi = aj + ai[i]; 1063 nz = a->diag[i] - ai[i]; 1064 sum = b[r[i]]; 1065 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1066 tmp[i] = sum; 1067 } 1068 /* backward solve the upper triangular */ 1069 for (i=n-1; i>=0; i--) { 1070 v = aa + a->diag[i] + 1; 1071 vi = aj + a->diag[i] + 1; 1072 nz = ai[i+1] - a->diag[i] - 1; 1073 sum = tmp[i]; 1074 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1075 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1076 } 1077 1078 b += n; 1079 x += n; 1080 } 1081 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1082 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1083 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1084 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1085 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1091 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1092 { 1093 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1094 IS iscol = a->col,isrow = a->row; 1095 PetscErrorCode ierr; 1096 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1097 PetscInt nz,neq; 1098 const PetscInt *rout,*cout,*r,*c; 1099 PetscScalar *x,*b,*tmp,sum; 1100 const MatScalar *aa = a->a,*v; 1101 PetscBool bisdense,xisdense; 1102 1103 PetscFunctionBegin; 1104 if (!n) PetscFunctionReturn(0); 1105 1106 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1107 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1108 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1109 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1110 1111 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1112 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1113 1114 tmp = a->solve_work; 1115 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1116 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1117 1118 for (neq=0; neq<B->cmap->n; neq++) { 1119 /* forward solve the lower triangular */ 1120 tmp[0] = b[r[0]]; 1121 v = aa; 1122 vi = aj; 1123 for (i=1; i<n; i++) { 1124 nz = ai[i+1] - ai[i]; 1125 sum = b[r[i]]; 1126 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1127 tmp[i] = sum; 1128 v += nz; vi += nz; 1129 } 1130 1131 /* backward solve the upper triangular */ 1132 for (i=n-1; i>=0; i--) { 1133 v = aa + adiag[i+1]+1; 1134 vi = aj + adiag[i+1]+1; 1135 nz = adiag[i]-adiag[i+1]-1; 1136 sum = tmp[i]; 1137 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1138 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1139 } 1140 1141 b += n; 1142 x += n; 1143 } 1144 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1145 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1146 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1147 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1148 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1154 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1155 { 1156 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1157 IS iscol = a->col,isrow = a->row; 1158 PetscErrorCode ierr; 1159 const PetscInt *r,*c,*rout,*cout; 1160 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1161 PetscInt nz,row; 1162 PetscScalar *x,*b,*tmp,*tmps,sum; 1163 const MatScalar *aa = a->a,*v; 1164 1165 PetscFunctionBegin; 1166 if (!n) PetscFunctionReturn(0); 1167 1168 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1169 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1170 tmp = a->solve_work; 1171 1172 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1173 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1174 1175 /* forward solve the lower triangular */ 1176 tmp[0] = b[*r++]; 1177 tmps = tmp; 1178 for (row=1; row<n; row++) { 1179 i = rout[row]; /* permuted row */ 1180 v = aa + ai[i]; 1181 vi = aj + ai[i]; 1182 nz = a->diag[i] - ai[i]; 1183 sum = b[*r++]; 1184 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1185 tmp[row] = sum; 1186 } 1187 1188 /* backward solve the upper triangular */ 1189 for (row=n-1; row>=0; row--) { 1190 i = rout[row]; /* permuted row */ 1191 v = aa + a->diag[i] + 1; 1192 vi = aj + a->diag[i] + 1; 1193 nz = ai[i+1] - a->diag[i] - 1; 1194 sum = tmp[row]; 1195 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1196 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1197 } 1198 1199 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1200 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1201 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1202 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1203 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 /* ----------------------------------------------------------- */ 1208 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1211 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1212 { 1213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1214 PetscErrorCode ierr; 1215 PetscInt n = A->rmap->n; 1216 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1217 PetscScalar *x; 1218 const PetscScalar *b; 1219 const MatScalar *aa = a->a; 1220 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1221 PetscInt adiag_i,i,nz,ai_i; 1222 const PetscInt *vi; 1223 const MatScalar *v; 1224 PetscScalar sum; 1225 #endif 1226 1227 PetscFunctionBegin; 1228 if (!n) PetscFunctionReturn(0); 1229 1230 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1231 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1232 1233 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1234 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1235 #else 1236 /* forward solve the lower triangular */ 1237 x[0] = b[0]; 1238 for (i=1; i<n; i++) { 1239 ai_i = ai[i]; 1240 v = aa + ai_i; 1241 vi = aj + ai_i; 1242 nz = adiag[i] - ai_i; 1243 sum = b[i]; 1244 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1245 x[i] = sum; 1246 } 1247 1248 /* backward solve the upper triangular */ 1249 for (i=n-1; i>=0; i--) { 1250 adiag_i = adiag[i]; 1251 v = aa + adiag_i + 1; 1252 vi = aj + adiag_i + 1; 1253 nz = ai[i+1] - adiag_i - 1; 1254 sum = x[i]; 1255 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1256 x[i] = sum*aa[adiag_i]; 1257 } 1258 #endif 1259 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1260 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1261 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1267 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1268 { 1269 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1270 IS iscol = a->col,isrow = a->row; 1271 PetscErrorCode ierr; 1272 PetscInt i, n = A->rmap->n,j; 1273 PetscInt nz; 1274 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1275 PetscScalar *x,*tmp,sum; 1276 const PetscScalar *b; 1277 const MatScalar *aa = a->a,*v; 1278 1279 PetscFunctionBegin; 1280 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1281 1282 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1283 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1284 tmp = a->solve_work; 1285 1286 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1287 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1288 1289 /* forward solve the lower triangular */ 1290 tmp[0] = b[*r++]; 1291 for (i=1; i<n; i++) { 1292 v = aa + ai[i]; 1293 vi = aj + ai[i]; 1294 nz = a->diag[i] - ai[i]; 1295 sum = b[*r++]; 1296 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1297 tmp[i] = sum; 1298 } 1299 1300 /* backward solve the upper triangular */ 1301 for (i=n-1; i>=0; i--) { 1302 v = aa + a->diag[i] + 1; 1303 vi = aj + a->diag[i] + 1; 1304 nz = ai[i+1] - a->diag[i] - 1; 1305 sum = tmp[i]; 1306 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1307 tmp[i] = sum*aa[a->diag[i]]; 1308 x[*c--] += tmp[i]; 1309 } 1310 1311 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1312 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1313 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1314 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1315 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1321 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1322 { 1323 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1324 IS iscol = a->col,isrow = a->row; 1325 PetscErrorCode ierr; 1326 PetscInt i, n = A->rmap->n,j; 1327 PetscInt nz; 1328 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1329 PetscScalar *x,*tmp,sum; 1330 const PetscScalar *b; 1331 const MatScalar *aa = a->a,*v; 1332 1333 PetscFunctionBegin; 1334 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1335 1336 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1337 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1338 tmp = a->solve_work; 1339 1340 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1341 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1342 1343 /* forward solve the lower triangular */ 1344 tmp[0] = b[r[0]]; 1345 v = aa; 1346 vi = aj; 1347 for (i=1; i<n; i++) { 1348 nz = ai[i+1] - ai[i]; 1349 sum = b[r[i]]; 1350 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1351 tmp[i] = sum; 1352 v += nz; 1353 vi += nz; 1354 } 1355 1356 /* backward solve the upper triangular */ 1357 v = aa + adiag[n-1]; 1358 vi = aj + adiag[n-1]; 1359 for (i=n-1; i>=0; i--) { 1360 nz = adiag[i] - adiag[i+1] - 1; 1361 sum = tmp[i]; 1362 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1363 tmp[i] = sum*v[nz]; 1364 x[c[i]] += tmp[i]; 1365 v += nz+1; vi += nz+1; 1366 } 1367 1368 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1369 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1370 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1371 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1372 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1378 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1379 { 1380 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1381 IS iscol = a->col,isrow = a->row; 1382 PetscErrorCode ierr; 1383 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1384 PetscInt i,n = A->rmap->n,j; 1385 PetscInt nz; 1386 PetscScalar *x,*tmp,s1; 1387 const MatScalar *aa = a->a,*v; 1388 const PetscScalar *b; 1389 1390 PetscFunctionBegin; 1391 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1392 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1393 tmp = a->solve_work; 1394 1395 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1396 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1397 1398 /* copy the b into temp work space according to permutation */ 1399 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1400 1401 /* forward solve the U^T */ 1402 for (i=0; i<n; i++) { 1403 v = aa + diag[i]; 1404 vi = aj + diag[i] + 1; 1405 nz = ai[i+1] - diag[i] - 1; 1406 s1 = tmp[i]; 1407 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1408 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1409 tmp[i] = s1; 1410 } 1411 1412 /* backward solve the L^T */ 1413 for (i=n-1; i>=0; i--) { 1414 v = aa + diag[i] - 1; 1415 vi = aj + diag[i] - 1; 1416 nz = diag[i] - ai[i]; 1417 s1 = tmp[i]; 1418 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1419 } 1420 1421 /* copy tmp into x according to permutation */ 1422 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1423 1424 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1425 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1426 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1427 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1428 1429 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1435 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1436 { 1437 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1438 IS iscol = a->col,isrow = a->row; 1439 PetscErrorCode ierr; 1440 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1441 PetscInt i,n = A->rmap->n,j; 1442 PetscInt nz; 1443 PetscScalar *x,*tmp,s1; 1444 const MatScalar *aa = a->a,*v; 1445 const PetscScalar *b; 1446 1447 PetscFunctionBegin; 1448 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1449 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1450 tmp = a->solve_work; 1451 1452 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1453 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1454 1455 /* copy the b into temp work space according to permutation */ 1456 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1457 1458 /* forward solve the U^T */ 1459 for (i=0; i<n; i++) { 1460 v = aa + adiag[i+1] + 1; 1461 vi = aj + adiag[i+1] + 1; 1462 nz = adiag[i] - adiag[i+1] - 1; 1463 s1 = tmp[i]; 1464 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1465 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1466 tmp[i] = s1; 1467 } 1468 1469 /* backward solve the L^T */ 1470 for (i=n-1; i>=0; i--) { 1471 v = aa + ai[i]; 1472 vi = aj + ai[i]; 1473 nz = ai[i+1] - ai[i]; 1474 s1 = tmp[i]; 1475 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1476 } 1477 1478 /* copy tmp into x according to permutation */ 1479 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1480 1481 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1482 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1483 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1484 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1485 1486 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1487 PetscFunctionReturn(0); 1488 } 1489 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1492 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1493 { 1494 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1495 IS iscol = a->col,isrow = a->row; 1496 PetscErrorCode ierr; 1497 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1498 PetscInt i,n = A->rmap->n,j; 1499 PetscInt nz; 1500 PetscScalar *x,*tmp,s1; 1501 const MatScalar *aa = a->a,*v; 1502 const PetscScalar *b; 1503 1504 PetscFunctionBegin; 1505 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1506 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1507 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1508 tmp = a->solve_work; 1509 1510 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1511 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1512 1513 /* copy the b into temp work space according to permutation */ 1514 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1515 1516 /* forward solve the U^T */ 1517 for (i=0; i<n; i++) { 1518 v = aa + diag[i]; 1519 vi = aj + diag[i] + 1; 1520 nz = ai[i+1] - diag[i] - 1; 1521 s1 = tmp[i]; 1522 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1523 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1524 tmp[i] = s1; 1525 } 1526 1527 /* backward solve the L^T */ 1528 for (i=n-1; i>=0; i--) { 1529 v = aa + diag[i] - 1; 1530 vi = aj + diag[i] - 1; 1531 nz = diag[i] - ai[i]; 1532 s1 = tmp[i]; 1533 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1534 } 1535 1536 /* copy tmp into x according to permutation */ 1537 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1538 1539 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1540 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1541 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1542 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1543 1544 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1550 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1551 { 1552 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1553 IS iscol = a->col,isrow = a->row; 1554 PetscErrorCode ierr; 1555 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1556 PetscInt i,n = A->rmap->n,j; 1557 PetscInt nz; 1558 PetscScalar *x,*tmp,s1; 1559 const MatScalar *aa = a->a,*v; 1560 const PetscScalar *b; 1561 1562 PetscFunctionBegin; 1563 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1564 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1565 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1566 tmp = a->solve_work; 1567 1568 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1569 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1570 1571 /* copy the b into temp work space according to permutation */ 1572 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1573 1574 /* forward solve the U^T */ 1575 for (i=0; i<n; i++) { 1576 v = aa + adiag[i+1] + 1; 1577 vi = aj + adiag[i+1] + 1; 1578 nz = adiag[i] - adiag[i+1] - 1; 1579 s1 = tmp[i]; 1580 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1581 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1582 tmp[i] = s1; 1583 } 1584 1585 1586 /* backward solve the L^T */ 1587 for (i=n-1; i>=0; i--) { 1588 v = aa + ai[i]; 1589 vi = aj + ai[i]; 1590 nz = ai[i+1] - ai[i]; 1591 s1 = tmp[i]; 1592 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1593 } 1594 1595 /* copy tmp into x according to permutation */ 1596 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1597 1598 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1599 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1600 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1601 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1602 1603 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1604 PetscFunctionReturn(0); 1605 } 1606 1607 /* ----------------------------------------------------------------*/ 1608 1609 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 1610 1611 /* 1612 ilu() under revised new data structure. 1613 Factored arrays bj and ba are stored as 1614 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1615 1616 bi=fact->i is an array of size n+1, in which 1617 bi+ 1618 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1619 bi[n]: points to L(n-1,n-1)+1 1620 1621 bdiag=fact->diag is an array of size n+1,in which 1622 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1623 bdiag[n]: points to entry of U(n-1,0)-1 1624 1625 U(i,:) contains bdiag[i] as its last entry, i.e., 1626 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1627 */ 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1630 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1631 { 1632 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1633 PetscErrorCode ierr; 1634 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1635 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1636 IS isicol; 1637 1638 PetscFunctionBegin; 1639 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1640 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1641 b = (Mat_SeqAIJ*)(fact)->data; 1642 1643 /* allocate matrix arrays for new data structure */ 1644 ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 1645 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1646 1647 b->singlemalloc = PETSC_TRUE; 1648 if (!b->diag) { 1649 ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); 1650 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1651 } 1652 bdiag = b->diag; 1653 1654 if (n > 0) { 1655 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1656 } 1657 1658 /* set bi and bj with new data structure */ 1659 bi = b->i; 1660 bj = b->j; 1661 1662 /* L part */ 1663 bi[0] = 0; 1664 for (i=0; i<n; i++) { 1665 nz = adiag[i] - ai[i]; 1666 bi[i+1] = bi[i] + nz; 1667 aj = a->j + ai[i]; 1668 for (j=0; j<nz; j++) { 1669 /* *bj = aj[j]; bj++; */ 1670 bj[k++] = aj[j]; 1671 } 1672 } 1673 1674 /* U part */ 1675 bdiag[n] = bi[n]-1; 1676 for (i=n-1; i>=0; i--) { 1677 nz = ai[i+1] - adiag[i] - 1; 1678 aj = a->j + adiag[i] + 1; 1679 for (j=0; j<nz; j++) { 1680 /* *bj = aj[j]; bj++; */ 1681 bj[k++] = aj[j]; 1682 } 1683 /* diag[i] */ 1684 /* *bj = i; bj++; */ 1685 bj[k++] = i; 1686 bdiag[i] = bdiag[i+1] + nz + 1; 1687 } 1688 1689 fact->factortype = MAT_FACTOR_ILU; 1690 fact->info.factor_mallocs = 0; 1691 fact->info.fill_ratio_given = info->fill; 1692 fact->info.fill_ratio_needed = 1.0; 1693 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1694 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1695 1696 b = (Mat_SeqAIJ*)(fact)->data; 1697 b->row = isrow; 1698 b->col = iscol; 1699 b->icol = isicol; 1700 ierr = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1701 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1702 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1708 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1709 { 1710 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1711 IS isicol; 1712 PetscErrorCode ierr; 1713 const PetscInt *r,*ic; 1714 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1715 PetscInt *bi,*cols,nnz,*cols_lvl; 1716 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1717 PetscInt i,levels,diagonal_fill; 1718 PetscBool col_identity,row_identity,missing; 1719 PetscReal f; 1720 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1721 PetscBT lnkbt; 1722 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1723 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1724 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1725 1726 PetscFunctionBegin; 1727 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1728 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1729 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1730 1731 levels = (PetscInt)info->levels; 1732 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1733 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1734 if (!levels && row_identity && col_identity) { 1735 /* special case: ilu(0) with natural ordering */ 1736 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1737 if (a->inode.size) { 1738 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1739 } 1740 PetscFunctionReturn(0); 1741 } 1742 1743 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1744 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1745 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1746 1747 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1748 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1749 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1750 bi[0] = bdiag[0] = 0; 1751 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1752 1753 /* create a linked list for storing column indices of the active row */ 1754 nlnk = n + 1; 1755 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1756 1757 /* initial FreeSpace size is f*(ai[n]+1) */ 1758 f = info->fill; 1759 diagonal_fill = (PetscInt)info->diagonal_fill; 1760 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1761 current_space = free_space; 1762 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1763 current_space_lvl = free_space_lvl; 1764 for (i=0; i<n; i++) { 1765 nzi = 0; 1766 /* copy current row into linked list */ 1767 nnz = ai[r[i]+1] - ai[r[i]]; 1768 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1769 cols = aj + ai[r[i]]; 1770 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1771 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1772 nzi += nlnk; 1773 1774 /* make sure diagonal entry is included */ 1775 if (diagonal_fill && lnk[i] == -1) { 1776 fm = n; 1777 while (lnk[fm] < i) fm = lnk[fm]; 1778 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1779 lnk[fm] = i; 1780 lnk_lvl[i] = 0; 1781 nzi++; dcount++; 1782 } 1783 1784 /* add pivot rows into the active row */ 1785 nzbd = 0; 1786 prow = lnk[n]; 1787 while (prow < i) { 1788 nnz = bdiag[prow]; 1789 cols = bj_ptr[prow] + nnz + 1; 1790 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1791 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1792 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1793 nzi += nlnk; 1794 prow = lnk[prow]; 1795 nzbd++; 1796 } 1797 bdiag[i] = nzbd; 1798 bi[i+1] = bi[i] + nzi; 1799 /* if free space is not available, make more free space */ 1800 if (current_space->local_remaining<nzi) { 1801 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1802 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1803 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1804 reallocs++; 1805 } 1806 1807 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1808 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1809 bj_ptr[i] = current_space->array; 1810 bjlvl_ptr[i] = current_space_lvl->array; 1811 1812 /* make sure the active row i has diagonal entry */ 1813 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1814 1815 current_space->array += nzi; 1816 current_space->local_used += nzi; 1817 current_space->local_remaining -= nzi; 1818 current_space_lvl->array += nzi; 1819 current_space_lvl->local_used += nzi; 1820 current_space_lvl->local_remaining -= nzi; 1821 } 1822 1823 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1824 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1825 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1826 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1827 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1828 1829 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1830 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1831 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1832 1833 #if defined(PETSC_USE_INFO) 1834 { 1835 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1836 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1837 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1838 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1839 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1840 if (diagonal_fill) { 1841 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 1842 } 1843 } 1844 #endif 1845 /* put together the new matrix */ 1846 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1847 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 1848 b = (Mat_SeqAIJ*)(fact)->data; 1849 1850 b->free_a = PETSC_TRUE; 1851 b->free_ij = PETSC_TRUE; 1852 b->singlemalloc = PETSC_FALSE; 1853 1854 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 1855 1856 b->j = bj; 1857 b->i = bi; 1858 b->diag = bdiag; 1859 b->ilen = 0; 1860 b->imax = 0; 1861 b->row = isrow; 1862 b->col = iscol; 1863 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1864 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1865 b->icol = isicol; 1866 1867 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 1868 /* In b structure: Free imax, ilen, old a, old j. 1869 Allocate bdiag, solve_work, new a, new j */ 1870 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1871 b->maxnz = b->nz = bdiag[0]+1; 1872 1873 (fact)->info.factor_mallocs = reallocs; 1874 (fact)->info.fill_ratio_given = f; 1875 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1876 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1877 if (a->inode.size) { 1878 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1879 } 1880 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1881 PetscFunctionReturn(0); 1882 } 1883 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1886 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1887 { 1888 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1889 IS isicol; 1890 PetscErrorCode ierr; 1891 const PetscInt *r,*ic; 1892 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1893 PetscInt *bi,*cols,nnz,*cols_lvl; 1894 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1895 PetscInt i,levels,diagonal_fill; 1896 PetscBool col_identity,row_identity; 1897 PetscReal f; 1898 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1899 PetscBT lnkbt; 1900 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1901 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1902 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1903 PetscBool missing; 1904 1905 PetscFunctionBegin; 1906 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1907 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1908 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1909 1910 f = info->fill; 1911 levels = (PetscInt)info->levels; 1912 diagonal_fill = (PetscInt)info->diagonal_fill; 1913 1914 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1915 1916 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1917 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1918 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1919 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1920 1921 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1922 if (a->inode.size) { 1923 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1924 } 1925 fact->factortype = MAT_FACTOR_ILU; 1926 (fact)->info.factor_mallocs = 0; 1927 (fact)->info.fill_ratio_given = info->fill; 1928 (fact)->info.fill_ratio_needed = 1.0; 1929 1930 b = (Mat_SeqAIJ*)(fact)->data; 1931 b->row = isrow; 1932 b->col = iscol; 1933 b->icol = isicol; 1934 ierr = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1935 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1936 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1937 PetscFunctionReturn(0); 1938 } 1939 1940 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1941 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1942 1943 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1944 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1945 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1946 bi[0] = bdiag[0] = 0; 1947 1948 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1949 1950 /* create a linked list for storing column indices of the active row */ 1951 nlnk = n + 1; 1952 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1953 1954 /* initial FreeSpace size is f*(ai[n]+1) */ 1955 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1956 current_space = free_space; 1957 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1958 current_space_lvl = free_space_lvl; 1959 1960 for (i=0; i<n; i++) { 1961 nzi = 0; 1962 /* copy current row into linked list */ 1963 nnz = ai[r[i]+1] - ai[r[i]]; 1964 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1965 cols = aj + ai[r[i]]; 1966 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1967 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1968 nzi += nlnk; 1969 1970 /* make sure diagonal entry is included */ 1971 if (diagonal_fill && lnk[i] == -1) { 1972 fm = n; 1973 while (lnk[fm] < i) fm = lnk[fm]; 1974 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1975 lnk[fm] = i; 1976 lnk_lvl[i] = 0; 1977 nzi++; dcount++; 1978 } 1979 1980 /* add pivot rows into the active row */ 1981 nzbd = 0; 1982 prow = lnk[n]; 1983 while (prow < i) { 1984 nnz = bdiag[prow]; 1985 cols = bj_ptr[prow] + nnz + 1; 1986 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1987 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1988 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1989 nzi += nlnk; 1990 prow = lnk[prow]; 1991 nzbd++; 1992 } 1993 bdiag[i] = nzbd; 1994 bi[i+1] = bi[i] + nzi; 1995 1996 /* if free space is not available, make more free space */ 1997 if (current_space->local_remaining<nzi) { 1998 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1999 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2000 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2001 reallocs++; 2002 } 2003 2004 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2005 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2006 bj_ptr[i] = current_space->array; 2007 bjlvl_ptr[i] = current_space_lvl->array; 2008 2009 /* make sure the active row i has diagonal entry */ 2010 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2011 2012 current_space->array += nzi; 2013 current_space->local_used += nzi; 2014 current_space->local_remaining -= nzi; 2015 current_space_lvl->array += nzi; 2016 current_space_lvl->local_used += nzi; 2017 current_space_lvl->local_remaining -= nzi; 2018 } 2019 2020 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2021 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2022 2023 /* destroy list of free space and other temporary arrays */ 2024 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 2025 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2026 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2027 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2028 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2029 2030 #if defined(PETSC_USE_INFO) 2031 { 2032 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2033 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 2034 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2035 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 2036 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2037 if (diagonal_fill) { 2038 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 2039 } 2040 } 2041 #endif 2042 2043 /* put together the new matrix */ 2044 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2045 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 2046 b = (Mat_SeqAIJ*)(fact)->data; 2047 2048 b->free_a = PETSC_TRUE; 2049 b->free_ij = PETSC_TRUE; 2050 b->singlemalloc = PETSC_FALSE; 2051 2052 ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr); 2053 b->j = bj; 2054 b->i = bi; 2055 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2056 b->diag = bdiag; 2057 b->ilen = 0; 2058 b->imax = 0; 2059 b->row = isrow; 2060 b->col = iscol; 2061 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2062 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2063 b->icol = isicol; 2064 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 2065 /* In b structure: Free imax, ilen, old a, old j. 2066 Allocate bdiag, solve_work, new a, new j */ 2067 ierr = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2068 b->maxnz = b->nz = bi[n]; 2069 2070 (fact)->info.factor_mallocs = reallocs; 2071 (fact)->info.fill_ratio_given = f; 2072 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2073 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2074 if (a->inode.size) { 2075 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2076 } 2077 PetscFunctionReturn(0); 2078 } 2079 2080 #undef __FUNCT__ 2081 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2082 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2083 { 2084 Mat C = B; 2085 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2086 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2087 IS ip=b->row,iip = b->icol; 2088 PetscErrorCode ierr; 2089 const PetscInt *rip,*riip; 2090 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2091 PetscInt *ai=a->i,*aj=a->j; 2092 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2093 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2094 PetscBool perm_identity; 2095 FactorShiftCtx sctx; 2096 PetscReal rs; 2097 MatScalar d,*v; 2098 2099 PetscFunctionBegin; 2100 /* MatPivotSetUp(): initialize shift context sctx */ 2101 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2102 2103 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2104 sctx.shift_top = info->zeropivot; 2105 for (i=0; i<mbs; i++) { 2106 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2107 d = (aa)[a->diag[i]]; 2108 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2109 v = aa+ai[i]; 2110 nz = ai[i+1] - ai[i]; 2111 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2112 if (rs>sctx.shift_top) sctx.shift_top = rs; 2113 } 2114 sctx.shift_top *= 1.1; 2115 sctx.nshift_max = 5; 2116 sctx.shift_lo = 0.; 2117 sctx.shift_hi = 1.; 2118 } 2119 2120 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2121 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2122 2123 /* allocate working arrays 2124 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2125 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2126 */ 2127 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr); 2128 2129 do { 2130 sctx.newshift = PETSC_FALSE; 2131 2132 for (i=0; i<mbs; i++) c2r[i] = mbs; 2133 if (mbs) il[0] = 0; 2134 2135 for (k = 0; k<mbs; k++) { 2136 /* zero rtmp */ 2137 nz = bi[k+1] - bi[k]; 2138 bjtmp = bj + bi[k]; 2139 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2140 2141 /* load in initial unfactored row */ 2142 bval = ba + bi[k]; 2143 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2144 for (j = jmin; j < jmax; j++) { 2145 col = riip[aj[j]]; 2146 if (col >= k) { /* only take upper triangular entry */ 2147 rtmp[col] = aa[j]; 2148 *bval++ = 0.0; /* for in-place factorization */ 2149 } 2150 } 2151 /* shift the diagonal of the matrix: ZeropivotApply() */ 2152 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2153 2154 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2155 dk = rtmp[k]; 2156 i = c2r[k]; /* first row to be added to k_th row */ 2157 2158 while (i < k) { 2159 nexti = c2r[i]; /* next row to be added to k_th row */ 2160 2161 /* compute multiplier, update diag(k) and U(i,k) */ 2162 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2163 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2164 dk += uikdi*ba[ili]; /* update diag[k] */ 2165 ba[ili] = uikdi; /* -U(i,k) */ 2166 2167 /* add multiple of row i to k-th row */ 2168 jmin = ili + 1; jmax = bi[i+1]; 2169 if (jmin < jmax) { 2170 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2171 /* update il and c2r for row i */ 2172 il[i] = jmin; 2173 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2174 } 2175 i = nexti; 2176 } 2177 2178 /* copy data into U(k,:) */ 2179 rs = 0.0; 2180 jmin = bi[k]; jmax = bi[k+1]-1; 2181 if (jmin < jmax) { 2182 for (j=jmin; j<jmax; j++) { 2183 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2184 } 2185 /* add the k-th row into il and c2r */ 2186 il[k] = jmin; 2187 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2188 } 2189 2190 /* MatPivotCheck() */ 2191 sctx.rs = rs; 2192 sctx.pv = dk; 2193 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2194 if (sctx.newshift) break; 2195 dk = sctx.pv; 2196 2197 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2198 } 2199 } while (sctx.newshift); 2200 2201 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2202 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2203 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2204 2205 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2206 if (perm_identity) { 2207 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2208 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2209 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2210 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2211 } else { 2212 B->ops->solve = MatSolve_SeqSBAIJ_1; 2213 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2214 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2215 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2216 } 2217 2218 C->assembled = PETSC_TRUE; 2219 C->preallocated = PETSC_TRUE; 2220 2221 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2222 2223 /* MatPivotView() */ 2224 if (sctx.nshift) { 2225 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2226 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 2227 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2228 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2229 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2230 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 2231 } 2232 } 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2238 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2239 { 2240 Mat C = B; 2241 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2242 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2243 IS ip=b->row,iip = b->icol; 2244 PetscErrorCode ierr; 2245 const PetscInt *rip,*riip; 2246 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2247 PetscInt *ai=a->i,*aj=a->j; 2248 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2249 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2250 PetscBool perm_identity; 2251 FactorShiftCtx sctx; 2252 PetscReal rs; 2253 MatScalar d,*v; 2254 2255 PetscFunctionBegin; 2256 /* MatPivotSetUp(): initialize shift context sctx */ 2257 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2258 2259 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2260 sctx.shift_top = info->zeropivot; 2261 for (i=0; i<mbs; i++) { 2262 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2263 d = (aa)[a->diag[i]]; 2264 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2265 v = aa+ai[i]; 2266 nz = ai[i+1] - ai[i]; 2267 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2268 if (rs>sctx.shift_top) sctx.shift_top = rs; 2269 } 2270 sctx.shift_top *= 1.1; 2271 sctx.nshift_max = 5; 2272 sctx.shift_lo = 0.; 2273 sctx.shift_hi = 1.; 2274 } 2275 2276 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2277 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2278 2279 /* initialization */ 2280 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 2281 2282 do { 2283 sctx.newshift = PETSC_FALSE; 2284 2285 for (i=0; i<mbs; i++) jl[i] = mbs; 2286 il[0] = 0; 2287 2288 for (k = 0; k<mbs; k++) { 2289 /* zero rtmp */ 2290 nz = bi[k+1] - bi[k]; 2291 bjtmp = bj + bi[k]; 2292 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2293 2294 bval = ba + bi[k]; 2295 /* initialize k-th row by the perm[k]-th row of A */ 2296 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2297 for (j = jmin; j < jmax; j++) { 2298 col = riip[aj[j]]; 2299 if (col >= k) { /* only take upper triangular entry */ 2300 rtmp[col] = aa[j]; 2301 *bval++ = 0.0; /* for in-place factorization */ 2302 } 2303 } 2304 /* shift the diagonal of the matrix */ 2305 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2306 2307 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2308 dk = rtmp[k]; 2309 i = jl[k]; /* first row to be added to k_th row */ 2310 2311 while (i < k) { 2312 nexti = jl[i]; /* next row to be added to k_th row */ 2313 2314 /* compute multiplier, update diag(k) and U(i,k) */ 2315 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2316 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2317 dk += uikdi*ba[ili]; 2318 ba[ili] = uikdi; /* -U(i,k) */ 2319 2320 /* add multiple of row i to k-th row */ 2321 jmin = ili + 1; jmax = bi[i+1]; 2322 if (jmin < jmax) { 2323 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2324 /* update il and jl for row i */ 2325 il[i] = jmin; 2326 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2327 } 2328 i = nexti; 2329 } 2330 2331 /* shift the diagonals when zero pivot is detected */ 2332 /* compute rs=sum of abs(off-diagonal) */ 2333 rs = 0.0; 2334 jmin = bi[k]+1; 2335 nz = bi[k+1] - jmin; 2336 bcol = bj + jmin; 2337 for (j=0; j<nz; j++) { 2338 rs += PetscAbsScalar(rtmp[bcol[j]]); 2339 } 2340 2341 sctx.rs = rs; 2342 sctx.pv = dk; 2343 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2344 if (sctx.newshift) break; 2345 dk = sctx.pv; 2346 2347 /* copy data into U(k,:) */ 2348 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2349 jmin = bi[k]+1; jmax = bi[k+1]; 2350 if (jmin < jmax) { 2351 for (j=jmin; j<jmax; j++) { 2352 col = bj[j]; ba[j] = rtmp[col]; 2353 } 2354 /* add the k-th row into il and jl */ 2355 il[k] = jmin; 2356 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2357 } 2358 } 2359 } while (sctx.newshift); 2360 2361 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2362 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2363 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2364 2365 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2366 if (perm_identity) { 2367 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2368 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2369 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2370 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2371 } else { 2372 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2373 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2374 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2375 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2376 } 2377 2378 C->assembled = PETSC_TRUE; 2379 C->preallocated = PETSC_TRUE; 2380 2381 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2382 if (sctx.nshift) { 2383 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2384 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2385 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2386 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2387 } 2388 } 2389 PetscFunctionReturn(0); 2390 } 2391 2392 /* 2393 icc() under revised new data structure. 2394 Factored arrays bj and ba are stored as 2395 U(0,:),...,U(i,:),U(n-1,:) 2396 2397 ui=fact->i is an array of size n+1, in which 2398 ui+ 2399 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2400 ui[n]: points to U(n-1,n-1)+1 2401 2402 udiag=fact->diag is an array of size n,in which 2403 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2404 2405 U(i,:) contains udiag[i] as its last entry, i.e., 2406 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2407 */ 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2411 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2412 { 2413 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2414 Mat_SeqSBAIJ *b; 2415 PetscErrorCode ierr; 2416 PetscBool perm_identity,missing; 2417 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2418 const PetscInt *rip,*riip; 2419 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2420 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2421 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2422 PetscReal fill =info->fill,levels=info->levels; 2423 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2424 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2425 PetscBT lnkbt; 2426 IS iperm; 2427 2428 PetscFunctionBegin; 2429 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2430 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2431 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2432 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2433 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2434 2435 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2436 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2437 ui[0] = 0; 2438 2439 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2440 if (!levels && perm_identity) { 2441 for (i=0; i<am; i++) { 2442 ncols = ai[i+1] - a->diag[i]; 2443 ui[i+1] = ui[i] + ncols; 2444 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2445 } 2446 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2447 cols = uj; 2448 for (i=0; i<am; i++) { 2449 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2450 ncols = ai[i+1] - a->diag[i] -1; 2451 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2452 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2453 } 2454 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2455 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2456 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2457 2458 /* initialization */ 2459 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2460 2461 /* jl: linked list for storing indices of the pivot rows 2462 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2463 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2464 for (i=0; i<am; i++) { 2465 jl[i] = am; il[i] = 0; 2466 } 2467 2468 /* create and initialize a linked list for storing column indices of the active row k */ 2469 nlnk = am + 1; 2470 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2471 2472 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2473 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2474 current_space = free_space; 2475 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2476 current_space_lvl = free_space_lvl; 2477 2478 for (k=0; k<am; k++) { /* for each active row k */ 2479 /* initialize lnk by the column indices of row rip[k] of A */ 2480 nzk = 0; 2481 ncols = ai[rip[k]+1] - ai[rip[k]]; 2482 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2483 ncols_upper = 0; 2484 for (j=0; j<ncols; j++) { 2485 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2486 if (riip[i] >= k) { /* only take upper triangular entry */ 2487 ajtmp[ncols_upper] = i; 2488 ncols_upper++; 2489 } 2490 } 2491 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2492 nzk += nlnk; 2493 2494 /* update lnk by computing fill-in for each pivot row to be merged in */ 2495 prow = jl[k]; /* 1st pivot row */ 2496 2497 while (prow < k) { 2498 nextprow = jl[prow]; 2499 2500 /* merge prow into k-th row */ 2501 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2502 jmax = ui[prow+1]; 2503 ncols = jmax-jmin; 2504 i = jmin - ui[prow]; 2505 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2506 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2507 j = *(uj - 1); 2508 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2509 nzk += nlnk; 2510 2511 /* update il and jl for prow */ 2512 if (jmin < jmax) { 2513 il[prow] = jmin; 2514 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2515 } 2516 prow = nextprow; 2517 } 2518 2519 /* if free space is not available, make more free space */ 2520 if (current_space->local_remaining<nzk) { 2521 i = am - k + 1; /* num of unfactored rows */ 2522 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2523 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2524 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2525 reallocs++; 2526 } 2527 2528 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2529 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2530 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2531 2532 /* add the k-th row into il and jl */ 2533 if (nzk > 1) { 2534 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2535 jl[k] = jl[i]; jl[i] = k; 2536 il[k] = ui[k] + 1; 2537 } 2538 uj_ptr[k] = current_space->array; 2539 uj_lvl_ptr[k] = current_space_lvl->array; 2540 2541 current_space->array += nzk; 2542 current_space->local_used += nzk; 2543 current_space->local_remaining -= nzk; 2544 2545 current_space_lvl->array += nzk; 2546 current_space_lvl->local_used += nzk; 2547 current_space_lvl->local_remaining -= nzk; 2548 2549 ui[k+1] = ui[k] + nzk; 2550 } 2551 2552 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2553 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2554 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2555 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2556 2557 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2558 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2559 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2560 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2561 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2562 2563 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2564 2565 /* put together the new matrix in MATSEQSBAIJ format */ 2566 b = (Mat_SeqSBAIJ*)(fact)->data; 2567 b->singlemalloc = PETSC_FALSE; 2568 2569 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2570 2571 b->j = uj; 2572 b->i = ui; 2573 b->diag = udiag; 2574 b->free_diag = PETSC_TRUE; 2575 b->ilen = 0; 2576 b->imax = 0; 2577 b->row = perm; 2578 b->col = perm; 2579 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2580 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2581 b->icol = iperm; 2582 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2583 2584 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2585 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2586 2587 b->maxnz = b->nz = ui[am]; 2588 b->free_a = PETSC_TRUE; 2589 b->free_ij = PETSC_TRUE; 2590 2591 fact->info.factor_mallocs = reallocs; 2592 fact->info.fill_ratio_given = fill; 2593 if (ai[am] != 0) { 2594 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2595 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2596 } else { 2597 fact->info.fill_ratio_needed = 0.0; 2598 } 2599 #if defined(PETSC_USE_INFO) 2600 if (ai[am] != 0) { 2601 PetscReal af = fact->info.fill_ratio_needed; 2602 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2603 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2604 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2605 } else { 2606 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2607 } 2608 #endif 2609 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2615 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2616 { 2617 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2618 Mat_SeqSBAIJ *b; 2619 PetscErrorCode ierr; 2620 PetscBool perm_identity,missing; 2621 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2622 const PetscInt *rip,*riip; 2623 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2624 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2625 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2626 PetscReal fill =info->fill,levels=info->levels; 2627 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2628 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2629 PetscBT lnkbt; 2630 IS iperm; 2631 2632 PetscFunctionBegin; 2633 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2634 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2635 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2636 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2637 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2638 2639 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2640 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2641 ui[0] = 0; 2642 2643 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2644 if (!levels && perm_identity) { 2645 2646 for (i=0; i<am; i++) { 2647 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2648 udiag[i] = ui[i]; 2649 } 2650 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2651 cols = uj; 2652 for (i=0; i<am; i++) { 2653 aj = a->j + a->diag[i]; 2654 ncols = ui[i+1] - ui[i]; 2655 for (j=0; j<ncols; j++) *cols++ = *aj++; 2656 } 2657 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2658 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2659 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2660 2661 /* initialization */ 2662 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2663 2664 /* jl: linked list for storing indices of the pivot rows 2665 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2666 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2667 for (i=0; i<am; i++) { 2668 jl[i] = am; il[i] = 0; 2669 } 2670 2671 /* create and initialize a linked list for storing column indices of the active row k */ 2672 nlnk = am + 1; 2673 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2674 2675 /* initial FreeSpace size is fill*(ai[am]+1) */ 2676 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2677 current_space = free_space; 2678 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2679 current_space_lvl = free_space_lvl; 2680 2681 for (k=0; k<am; k++) { /* for each active row k */ 2682 /* initialize lnk by the column indices of row rip[k] of A */ 2683 nzk = 0; 2684 ncols = ai[rip[k]+1] - ai[rip[k]]; 2685 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2686 ncols_upper = 0; 2687 for (j=0; j<ncols; j++) { 2688 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2689 if (riip[i] >= k) { /* only take upper triangular entry */ 2690 ajtmp[ncols_upper] = i; 2691 ncols_upper++; 2692 } 2693 } 2694 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2695 nzk += nlnk; 2696 2697 /* update lnk by computing fill-in for each pivot row to be merged in */ 2698 prow = jl[k]; /* 1st pivot row */ 2699 2700 while (prow < k) { 2701 nextprow = jl[prow]; 2702 2703 /* merge prow into k-th row */ 2704 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2705 jmax = ui[prow+1]; 2706 ncols = jmax-jmin; 2707 i = jmin - ui[prow]; 2708 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2709 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2710 j = *(uj - 1); 2711 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2712 nzk += nlnk; 2713 2714 /* update il and jl for prow */ 2715 if (jmin < jmax) { 2716 il[prow] = jmin; 2717 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2718 } 2719 prow = nextprow; 2720 } 2721 2722 /* if free space is not available, make more free space */ 2723 if (current_space->local_remaining<nzk) { 2724 i = am - k + 1; /* num of unfactored rows */ 2725 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2726 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2727 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2728 reallocs++; 2729 } 2730 2731 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2732 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2733 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2734 2735 /* add the k-th row into il and jl */ 2736 if (nzk > 1) { 2737 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2738 jl[k] = jl[i]; jl[i] = k; 2739 il[k] = ui[k] + 1; 2740 } 2741 uj_ptr[k] = current_space->array; 2742 uj_lvl_ptr[k] = current_space_lvl->array; 2743 2744 current_space->array += nzk; 2745 current_space->local_used += nzk; 2746 current_space->local_remaining -= nzk; 2747 2748 current_space_lvl->array += nzk; 2749 current_space_lvl->local_used += nzk; 2750 current_space_lvl->local_remaining -= nzk; 2751 2752 ui[k+1] = ui[k] + nzk; 2753 } 2754 2755 #if defined(PETSC_USE_INFO) 2756 if (ai[am] != 0) { 2757 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2758 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2759 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2760 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2761 } else { 2762 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2763 } 2764 #endif 2765 2766 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2767 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2768 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2769 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2770 2771 /* destroy list of free space and other temporary array(s) */ 2772 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2773 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2774 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2775 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2776 2777 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2778 2779 /* put together the new matrix in MATSEQSBAIJ format */ 2780 2781 b = (Mat_SeqSBAIJ*)fact->data; 2782 b->singlemalloc = PETSC_FALSE; 2783 2784 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2785 2786 b->j = uj; 2787 b->i = ui; 2788 b->diag = udiag; 2789 b->free_diag = PETSC_TRUE; 2790 b->ilen = 0; 2791 b->imax = 0; 2792 b->row = perm; 2793 b->col = perm; 2794 2795 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2796 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2797 2798 b->icol = iperm; 2799 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2800 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2801 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2802 b->maxnz = b->nz = ui[am]; 2803 b->free_a = PETSC_TRUE; 2804 b->free_ij = PETSC_TRUE; 2805 2806 fact->info.factor_mallocs = reallocs; 2807 fact->info.fill_ratio_given = fill; 2808 if (ai[am] != 0) { 2809 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2810 } else { 2811 fact->info.fill_ratio_needed = 0.0; 2812 } 2813 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2814 PetscFunctionReturn(0); 2815 } 2816 2817 #undef __FUNCT__ 2818 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2819 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2820 { 2821 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2822 Mat_SeqSBAIJ *b; 2823 PetscErrorCode ierr; 2824 PetscBool perm_identity,missing; 2825 PetscReal fill = info->fill; 2826 const PetscInt *rip,*riip; 2827 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2828 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2829 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2830 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2831 PetscBT lnkbt; 2832 IS iperm; 2833 2834 PetscFunctionBegin; 2835 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2836 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2837 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2838 2839 /* check whether perm is the identity mapping */ 2840 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2841 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2842 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2843 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2844 2845 /* initialization */ 2846 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2847 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2848 ui[0] = 0; 2849 2850 /* jl: linked list for storing indices of the pivot rows 2851 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2852 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2853 for (i=0; i<am; i++) { 2854 jl[i] = am; il[i] = 0; 2855 } 2856 2857 /* create and initialize a linked list for storing column indices of the active row k */ 2858 nlnk = am + 1; 2859 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2860 2861 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2862 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2863 current_space = free_space; 2864 2865 for (k=0; k<am; k++) { /* for each active row k */ 2866 /* initialize lnk by the column indices of row rip[k] of A */ 2867 nzk = 0; 2868 ncols = ai[rip[k]+1] - ai[rip[k]]; 2869 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2870 ncols_upper = 0; 2871 for (j=0; j<ncols; j++) { 2872 i = riip[*(aj + ai[rip[k]] + j)]; 2873 if (i >= k) { /* only take upper triangular entry */ 2874 cols[ncols_upper] = i; 2875 ncols_upper++; 2876 } 2877 } 2878 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2879 nzk += nlnk; 2880 2881 /* update lnk by computing fill-in for each pivot row to be merged in */ 2882 prow = jl[k]; /* 1st pivot row */ 2883 2884 while (prow < k) { 2885 nextprow = jl[prow]; 2886 /* merge prow into k-th row */ 2887 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2888 jmax = ui[prow+1]; 2889 ncols = jmax-jmin; 2890 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2891 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2892 nzk += nlnk; 2893 2894 /* update il and jl for prow */ 2895 if (jmin < jmax) { 2896 il[prow] = jmin; 2897 j = *uj_ptr; 2898 jl[prow] = jl[j]; 2899 jl[j] = prow; 2900 } 2901 prow = nextprow; 2902 } 2903 2904 /* if free space is not available, make more free space */ 2905 if (current_space->local_remaining<nzk) { 2906 i = am - k + 1; /* num of unfactored rows */ 2907 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2908 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2909 reallocs++; 2910 } 2911 2912 /* copy data into free space, then initialize lnk */ 2913 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2914 2915 /* add the k-th row into il and jl */ 2916 if (nzk > 1) { 2917 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2918 jl[k] = jl[i]; jl[i] = k; 2919 il[k] = ui[k] + 1; 2920 } 2921 ui_ptr[k] = current_space->array; 2922 2923 current_space->array += nzk; 2924 current_space->local_used += nzk; 2925 current_space->local_remaining -= nzk; 2926 2927 ui[k+1] = ui[k] + nzk; 2928 } 2929 2930 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2931 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2932 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2933 2934 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2935 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2936 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2937 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2938 2939 /* put together the new matrix in MATSEQSBAIJ format */ 2940 2941 b = (Mat_SeqSBAIJ*)fact->data; 2942 b->singlemalloc = PETSC_FALSE; 2943 b->free_a = PETSC_TRUE; 2944 b->free_ij = PETSC_TRUE; 2945 2946 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2947 2948 b->j = uj; 2949 b->i = ui; 2950 b->diag = udiag; 2951 b->free_diag = PETSC_TRUE; 2952 b->ilen = 0; 2953 b->imax = 0; 2954 b->row = perm; 2955 b->col = perm; 2956 2957 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2958 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2959 2960 b->icol = iperm; 2961 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2962 2963 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2964 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2965 2966 b->maxnz = b->nz = ui[am]; 2967 2968 fact->info.factor_mallocs = reallocs; 2969 fact->info.fill_ratio_given = fill; 2970 if (ai[am] != 0) { 2971 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2972 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2973 } else { 2974 fact->info.fill_ratio_needed = 0.0; 2975 } 2976 #if defined(PETSC_USE_INFO) 2977 if (ai[am] != 0) { 2978 PetscReal af = fact->info.fill_ratio_needed; 2979 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2980 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2981 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2982 } else { 2983 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2984 } 2985 #endif 2986 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2987 PetscFunctionReturn(0); 2988 } 2989 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2992 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2993 { 2994 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2995 Mat_SeqSBAIJ *b; 2996 PetscErrorCode ierr; 2997 PetscBool perm_identity,missing; 2998 PetscReal fill = info->fill; 2999 const PetscInt *rip,*riip; 3000 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 3001 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 3002 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 3003 PetscFreeSpaceList free_space=NULL,current_space=NULL; 3004 PetscBT lnkbt; 3005 IS iperm; 3006 3007 PetscFunctionBegin; 3008 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 3009 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3010 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3011 3012 /* check whether perm is the identity mapping */ 3013 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 3014 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 3015 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 3016 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 3017 3018 /* initialization */ 3019 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 3020 ui[0] = 0; 3021 3022 /* jl: linked list for storing indices of the pivot rows 3023 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3024 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 3025 for (i=0; i<am; i++) { 3026 jl[i] = am; il[i] = 0; 3027 } 3028 3029 /* create and initialize a linked list for storing column indices of the active row k */ 3030 nlnk = am + 1; 3031 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3032 3033 /* initial FreeSpace size is fill*(ai[am]+1) */ 3034 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3035 current_space = free_space; 3036 3037 for (k=0; k<am; k++) { /* for each active row k */ 3038 /* initialize lnk by the column indices of row rip[k] of A */ 3039 nzk = 0; 3040 ncols = ai[rip[k]+1] - ai[rip[k]]; 3041 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3042 ncols_upper = 0; 3043 for (j=0; j<ncols; j++) { 3044 i = riip[*(aj + ai[rip[k]] + j)]; 3045 if (i >= k) { /* only take upper triangular entry */ 3046 cols[ncols_upper] = i; 3047 ncols_upper++; 3048 } 3049 } 3050 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3051 nzk += nlnk; 3052 3053 /* update lnk by computing fill-in for each pivot row to be merged in */ 3054 prow = jl[k]; /* 1st pivot row */ 3055 3056 while (prow < k) { 3057 nextprow = jl[prow]; 3058 /* merge prow into k-th row */ 3059 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3060 jmax = ui[prow+1]; 3061 ncols = jmax-jmin; 3062 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3063 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3064 nzk += nlnk; 3065 3066 /* update il and jl for prow */ 3067 if (jmin < jmax) { 3068 il[prow] = jmin; 3069 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3070 } 3071 prow = nextprow; 3072 } 3073 3074 /* if free space is not available, make more free space */ 3075 if (current_space->local_remaining<nzk) { 3076 i = am - k + 1; /* num of unfactored rows */ 3077 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3078 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3079 reallocs++; 3080 } 3081 3082 /* copy data into free space, then initialize lnk */ 3083 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3084 3085 /* add the k-th row into il and jl */ 3086 if (nzk-1 > 0) { 3087 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3088 jl[k] = jl[i]; jl[i] = k; 3089 il[k] = ui[k] + 1; 3090 } 3091 ui_ptr[k] = current_space->array; 3092 3093 current_space->array += nzk; 3094 current_space->local_used += nzk; 3095 current_space->local_remaining -= nzk; 3096 3097 ui[k+1] = ui[k] + nzk; 3098 } 3099 3100 #if defined(PETSC_USE_INFO) 3101 if (ai[am] != 0) { 3102 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3103 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3104 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3105 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3106 } else { 3107 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3108 } 3109 #endif 3110 3111 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3112 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3113 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3114 3115 /* destroy list of free space and other temporary array(s) */ 3116 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 3117 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3118 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3119 3120 /* put together the new matrix in MATSEQSBAIJ format */ 3121 3122 b = (Mat_SeqSBAIJ*)fact->data; 3123 b->singlemalloc = PETSC_FALSE; 3124 b->free_a = PETSC_TRUE; 3125 b->free_ij = PETSC_TRUE; 3126 3127 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 3128 3129 b->j = uj; 3130 b->i = ui; 3131 b->diag = 0; 3132 b->ilen = 0; 3133 b->imax = 0; 3134 b->row = perm; 3135 b->col = perm; 3136 3137 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3138 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3139 3140 b->icol = iperm; 3141 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3142 3143 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 3144 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3145 b->maxnz = b->nz = ui[am]; 3146 3147 fact->info.factor_mallocs = reallocs; 3148 fact->info.fill_ratio_given = fill; 3149 if (ai[am] != 0) { 3150 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3151 } else { 3152 fact->info.fill_ratio_needed = 0.0; 3153 } 3154 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3155 PetscFunctionReturn(0); 3156 } 3157 3158 #undef __FUNCT__ 3159 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3160 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3161 { 3162 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3163 PetscErrorCode ierr; 3164 PetscInt n = A->rmap->n; 3165 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3166 PetscScalar *x,sum; 3167 const PetscScalar *b; 3168 const MatScalar *aa = a->a,*v; 3169 PetscInt i,nz; 3170 3171 PetscFunctionBegin; 3172 if (!n) PetscFunctionReturn(0); 3173 3174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3175 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3176 3177 /* forward solve the lower triangular */ 3178 x[0] = b[0]; 3179 v = aa; 3180 vi = aj; 3181 for (i=1; i<n; i++) { 3182 nz = ai[i+1] - ai[i]; 3183 sum = b[i]; 3184 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3185 v += nz; 3186 vi += nz; 3187 x[i] = sum; 3188 } 3189 3190 /* backward solve the upper triangular */ 3191 for (i=n-1; i>=0; i--) { 3192 v = aa + adiag[i+1] + 1; 3193 vi = aj + adiag[i+1] + 1; 3194 nz = adiag[i] - adiag[i+1]-1; 3195 sum = x[i]; 3196 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3197 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3198 } 3199 3200 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3201 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3202 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3203 PetscFunctionReturn(0); 3204 } 3205 3206 #undef __FUNCT__ 3207 #define __FUNCT__ "MatSolve_SeqAIJ" 3208 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3209 { 3210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3211 IS iscol = a->col,isrow = a->row; 3212 PetscErrorCode ierr; 3213 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3214 const PetscInt *rout,*cout,*r,*c; 3215 PetscScalar *x,*tmp,sum; 3216 const PetscScalar *b; 3217 const MatScalar *aa = a->a,*v; 3218 3219 PetscFunctionBegin; 3220 if (!n) PetscFunctionReturn(0); 3221 3222 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3223 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3224 tmp = a->solve_work; 3225 3226 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3227 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3228 3229 /* forward solve the lower triangular */ 3230 tmp[0] = b[r[0]]; 3231 v = aa; 3232 vi = aj; 3233 for (i=1; i<n; i++) { 3234 nz = ai[i+1] - ai[i]; 3235 sum = b[r[i]]; 3236 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3237 tmp[i] = sum; 3238 v += nz; vi += nz; 3239 } 3240 3241 /* backward solve the upper triangular */ 3242 for (i=n-1; i>=0; i--) { 3243 v = aa + adiag[i+1]+1; 3244 vi = aj + adiag[i+1]+1; 3245 nz = adiag[i]-adiag[i+1]-1; 3246 sum = tmp[i]; 3247 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3248 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3249 } 3250 3251 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3252 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3253 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3254 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3255 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3256 PetscFunctionReturn(0); 3257 } 3258 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3261 /* 3262 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3263 */ 3264 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3265 { 3266 Mat B = *fact; 3267 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3268 IS isicol; 3269 PetscErrorCode ierr; 3270 const PetscInt *r,*ic; 3271 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3272 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3273 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3274 PetscInt nlnk,*lnk; 3275 PetscBT lnkbt; 3276 PetscBool row_identity,icol_identity; 3277 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3278 const PetscInt *ics; 3279 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3280 PetscReal dt =info->dt,shift=info->shiftamount; 3281 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3282 PetscBool missing; 3283 3284 PetscFunctionBegin; 3285 if (dt == PETSC_DEFAULT) dt = 0.005; 3286 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3287 3288 /* ------- symbolic factorization, can be reused ---------*/ 3289 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3290 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3291 adiag=a->diag; 3292 3293 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3294 3295 /* bdiag is location of diagonal in factor */ 3296 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3297 ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3298 3299 /* allocate row pointers bi */ 3300 ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr); 3301 3302 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3303 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3304 nnz_max = ai[n]+2*n*dtcount+2; 3305 3306 ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr); 3307 ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr); 3308 3309 /* put together the new matrix */ 3310 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3311 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 3312 b = (Mat_SeqAIJ*)B->data; 3313 3314 b->free_a = PETSC_TRUE; 3315 b->free_ij = PETSC_TRUE; 3316 b->singlemalloc = PETSC_FALSE; 3317 3318 b->a = ba; 3319 b->j = bj; 3320 b->i = bi; 3321 b->diag = bdiag; 3322 b->ilen = 0; 3323 b->imax = 0; 3324 b->row = isrow; 3325 b->col = iscol; 3326 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3327 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3328 b->icol = isicol; 3329 3330 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 3331 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3332 b->maxnz = nnz_max; 3333 3334 B->factortype = MAT_FACTOR_ILUDT; 3335 B->info.factor_mallocs = 0; 3336 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3337 /* ------- end of symbolic factorization ---------*/ 3338 3339 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3340 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3341 ics = ic; 3342 3343 /* linked list for storing column indices of the active row */ 3344 nlnk = n + 1; 3345 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3346 3347 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3348 ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr); 3349 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3350 ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr); 3351 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3352 3353 bi[0] = 0; 3354 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3355 bdiag_rev[n] = bdiag[0]; 3356 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3357 for (i=0; i<n; i++) { 3358 /* copy initial fill into linked list */ 3359 nzi = ai[r[i]+1] - ai[r[i]]; 3360 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3361 nzi_al = adiag[r[i]] - ai[r[i]]; 3362 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3363 ajtmp = aj + ai[r[i]]; 3364 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3365 3366 /* load in initial (unfactored row) */ 3367 aatmp = a->a + ai[r[i]]; 3368 for (j=0; j<nzi; j++) { 3369 rtmp[ics[*ajtmp++]] = *aatmp++; 3370 } 3371 3372 /* add pivot rows into linked list */ 3373 row = lnk[n]; 3374 while (row < i) { 3375 nzi_bl = bi[row+1] - bi[row] + 1; 3376 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3377 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3378 nzi += nlnk; 3379 row = lnk[row]; 3380 } 3381 3382 /* copy data from lnk into jtmp, then initialize lnk */ 3383 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3384 3385 /* numerical factorization */ 3386 bjtmp = jtmp; 3387 row = *bjtmp++; /* 1st pivot row */ 3388 while (row < i) { 3389 pc = rtmp + row; 3390 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3391 multiplier = (*pc) * (*pv); 3392 *pc = multiplier; 3393 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3394 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3395 pv = ba + bdiag[row+1] + 1; 3396 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3397 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3398 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3399 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3400 } 3401 row = *bjtmp++; 3402 } 3403 3404 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3405 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3406 nzi_bl = 0; j = 0; 3407 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3408 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3409 nzi_bl++; j++; 3410 } 3411 nzi_bu = nzi - nzi_bl -1; 3412 while (j < nzi) { 3413 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3414 j++; 3415 } 3416 3417 bjtmp = bj + bi[i]; 3418 batmp = ba + bi[i]; 3419 /* apply level dropping rule to L part */ 3420 ncut = nzi_al + dtcount; 3421 if (ncut < nzi_bl) { 3422 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3423 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3424 } else { 3425 ncut = nzi_bl; 3426 } 3427 for (j=0; j<ncut; j++) { 3428 bjtmp[j] = jtmp[j]; 3429 batmp[j] = vtmp[j]; 3430 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3431 } 3432 bi[i+1] = bi[i] + ncut; 3433 nzi = ncut + 1; 3434 3435 /* apply level dropping rule to U part */ 3436 ncut = nzi_au + dtcount; 3437 if (ncut < nzi_bu) { 3438 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3439 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3440 } else { 3441 ncut = nzi_bu; 3442 } 3443 nzi += ncut; 3444 3445 /* mark bdiagonal */ 3446 bdiag[i+1] = bdiag[i] - (ncut + 1); 3447 bdiag_rev[n-i-1] = bdiag[i+1]; 3448 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3449 bjtmp = bj + bdiag[i]; 3450 batmp = ba + bdiag[i]; 3451 *bjtmp = i; 3452 *batmp = diag_tmp; /* rtmp[i]; */ 3453 if (*batmp == 0.0) { 3454 *batmp = dt+shift; 3455 /* printf(" row %d add shift %g\n",i,shift); */ 3456 } 3457 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3458 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3459 3460 bjtmp = bj + bdiag[i+1]+1; 3461 batmp = ba + bdiag[i+1]+1; 3462 for (k=0; k<ncut; k++) { 3463 bjtmp[k] = jtmp[nzi_bl+1+k]; 3464 batmp[k] = vtmp[nzi_bl+1+k]; 3465 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3466 } 3467 /* printf("\n"); */ 3468 3469 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3470 /* 3471 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3472 printf(" ----------------------------\n"); 3473 */ 3474 } /* for (i=0; i<n; i++) */ 3475 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3476 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3477 3478 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3479 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3480 3481 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3482 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3483 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3484 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3485 3486 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3487 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3488 3489 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3490 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3491 if (row_identity && icol_identity) { 3492 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3493 } else { 3494 B->ops->solve = MatSolve_SeqAIJ; 3495 } 3496 3497 B->ops->solveadd = 0; 3498 B->ops->solvetranspose = 0; 3499 B->ops->solvetransposeadd = 0; 3500 B->ops->matsolve = 0; 3501 B->assembled = PETSC_TRUE; 3502 B->preallocated = PETSC_TRUE; 3503 PetscFunctionReturn(0); 3504 } 3505 3506 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3507 #undef __FUNCT__ 3508 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3509 /* 3510 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3511 */ 3512 3513 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3514 { 3515 PetscErrorCode ierr; 3516 3517 PetscFunctionBegin; 3518 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3519 PetscFunctionReturn(0); 3520 } 3521 3522 /* 3523 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3524 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3525 */ 3526 #undef __FUNCT__ 3527 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3528 /* 3529 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3530 */ 3531 3532 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3533 { 3534 Mat C =fact; 3535 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3536 IS isrow = b->row,isicol = b->icol; 3537 PetscErrorCode ierr; 3538 const PetscInt *r,*ic,*ics; 3539 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3540 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3541 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3542 PetscReal dt=info->dt,shift=info->shiftamount; 3543 PetscBool row_identity, col_identity; 3544 3545 PetscFunctionBegin; 3546 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3547 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3548 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 3549 ics = ic; 3550 3551 for (i=0; i<n; i++) { 3552 /* initialize rtmp array */ 3553 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3554 bjtmp = bj + bi[i]; 3555 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3556 rtmp[i] = 0.0; 3557 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3558 bjtmp = bj + bdiag[i+1] + 1; 3559 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3560 3561 /* load in initial unfactored row of A */ 3562 /* printf("row %d\n",i); */ 3563 nz = ai[r[i]+1] - ai[r[i]]; 3564 ajtmp = aj + ai[r[i]]; 3565 v = aa + ai[r[i]]; 3566 for (j=0; j<nz; j++) { 3567 rtmp[ics[*ajtmp++]] = v[j]; 3568 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3569 } 3570 /* printf("\n"); */ 3571 3572 /* numerical factorization */ 3573 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3574 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3575 k = 0; 3576 while (k < nzl) { 3577 row = *bjtmp++; 3578 /* printf(" prow %d\n",row); */ 3579 pc = rtmp + row; 3580 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3581 multiplier = (*pc) * (*pv); 3582 *pc = multiplier; 3583 if (PetscAbsScalar(multiplier) > dt) { 3584 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3585 pv = b->a + bdiag[row+1] + 1; 3586 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3587 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3588 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3589 } 3590 k++; 3591 } 3592 3593 /* finished row so stick it into b->a */ 3594 /* L-part */ 3595 pv = b->a + bi[i]; 3596 pj = bj + bi[i]; 3597 nzl = bi[i+1] - bi[i]; 3598 for (j=0; j<nzl; j++) { 3599 pv[j] = rtmp[pj[j]]; 3600 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3601 } 3602 3603 /* diagonal: invert diagonal entries for simplier triangular solves */ 3604 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3605 b->a[bdiag[i]] = 1.0/rtmp[i]; 3606 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3607 3608 /* U-part */ 3609 pv = b->a + bdiag[i+1] + 1; 3610 pj = bj + bdiag[i+1] + 1; 3611 nzu = bdiag[i] - bdiag[i+1] - 1; 3612 for (j=0; j<nzu; j++) { 3613 pv[j] = rtmp[pj[j]]; 3614 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3615 } 3616 /* printf("\n"); */ 3617 } 3618 3619 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3620 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3621 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3622 3623 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3624 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3625 if (row_identity && col_identity) { 3626 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3627 } else { 3628 C->ops->solve = MatSolve_SeqAIJ; 3629 } 3630 C->ops->solveadd = 0; 3631 C->ops->solvetranspose = 0; 3632 C->ops->solvetransposeadd = 0; 3633 C->ops->matsolve = 0; 3634 C->assembled = PETSC_TRUE; 3635 C->preallocated = PETSC_TRUE; 3636 3637 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3638 PetscFunctionReturn(0); 3639 } 3640