1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 9 /* 10 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 11 12 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 13 */ 14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 15 { 16 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 19 const PetscInt *ai = a->i, *aj = a->j; 20 const PetscScalar *aa = a->a; 21 PetscBool *done; 22 PetscReal best,past = 0,future; 23 24 PetscFunctionBegin; 25 /* pick initial row */ 26 best = -1; 27 for (i=0; i<n; i++) { 28 future = 0.0; 29 for (j=ai[i]; j<ai[i+1]; j++) { 30 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 31 else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc1(n,&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc1(n,&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 96 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 97 { 98 PetscInt n = A->rmap->n; 99 PetscErrorCode ierr; 100 101 PetscFunctionBegin; 102 #if defined(PETSC_USE_COMPLEX) 103 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 104 #endif 105 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 106 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 107 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 108 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 109 110 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 111 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 112 113 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 114 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 115 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 116 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 117 118 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 119 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 120 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 121 (*B)->factortype = ftype; 122 PetscFunctionReturn(0); 123 } 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 127 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 128 { 129 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 130 IS isicol; 131 PetscErrorCode ierr; 132 const PetscInt *r,*ic; 133 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 134 PetscInt *bi,*bj,*ajtmp; 135 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 136 PetscReal f; 137 PetscInt nlnk,*lnk,k,**bi_ptr; 138 PetscFreeSpaceList free_space=NULL,current_space=NULL; 139 PetscBT lnkbt; 140 PetscBool missing; 141 142 PetscFunctionBegin; 143 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 144 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 145 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 146 147 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 148 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 149 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 150 151 /* get new row pointers */ 152 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 153 bi[0] = 0; 154 155 /* bdiag is location of diagonal in factor */ 156 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 157 bdiag[0] = 0; 158 159 /* linked list for storing column indices of the active row */ 160 nlnk = n + 1; 161 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 162 163 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 164 165 /* initial FreeSpace size is f*(ai[n]+1) */ 166 f = info->fill; 167 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 168 current_space = free_space; 169 170 for (i=0; i<n; i++) { 171 /* copy previous fill into linked list */ 172 nzi = 0; 173 nnz = ai[r[i]+1] - ai[r[i]]; 174 ajtmp = aj + ai[r[i]]; 175 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 176 nzi += nlnk; 177 178 /* add pivot rows into linked list */ 179 row = lnk[n]; 180 while (row < i) { 181 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 182 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 183 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 184 nzi += nlnk; 185 row = lnk[row]; 186 } 187 bi[i+1] = bi[i] + nzi; 188 im[i] = nzi; 189 190 /* mark bdiag */ 191 nzbd = 0; 192 nnz = nzi; 193 k = lnk[n]; 194 while (nnz-- && k < i) { 195 nzbd++; 196 k = lnk[k]; 197 } 198 bdiag[i] = bi[i] + nzbd; 199 200 /* if free space is not available, make more free space */ 201 if (current_space->local_remaining<nzi) { 202 nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 203 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 204 reallocs++; 205 } 206 207 /* copy data into free space, then initialize lnk */ 208 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 209 210 bi_ptr[i] = current_space->array; 211 current_space->array += nzi; 212 current_space->local_used += nzi; 213 current_space->local_remaining -= nzi; 214 } 215 #if defined(PETSC_USE_INFO) 216 if (ai[n] != 0) { 217 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 218 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 219 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 220 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 221 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 222 } else { 223 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 224 } 225 #endif 226 227 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 228 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 229 230 /* destroy list of free space and other temporary array(s) */ 231 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 232 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 233 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 234 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 235 236 /* put together the new matrix */ 237 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 238 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 239 b = (Mat_SeqAIJ*)(B)->data; 240 241 b->free_a = PETSC_TRUE; 242 b->free_ij = PETSC_TRUE; 243 b->singlemalloc = PETSC_FALSE; 244 245 ierr = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr); 246 b->j = bj; 247 b->i = bi; 248 b->diag = bdiag; 249 b->ilen = 0; 250 b->imax = 0; 251 b->row = isrow; 252 b->col = iscol; 253 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 254 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 255 b->icol = isicol; 256 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 257 258 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 259 ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 260 b->maxnz = b->nz = bi[n]; 261 262 (B)->factortype = MAT_FACTOR_LU; 263 (B)->info.factor_mallocs = reallocs; 264 (B)->info.fill_ratio_given = f; 265 266 if (ai[n]) { 267 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 268 } else { 269 (B)->info.fill_ratio_needed = 0.0; 270 } 271 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 272 if (a->inode.size) { 273 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 280 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 281 { 282 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 283 IS isicol; 284 PetscErrorCode ierr; 285 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 286 PetscInt i,n=A->rmap->n; 287 PetscInt *bi,*bj; 288 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 289 PetscReal f; 290 PetscInt nlnk,*lnk,k,**bi_ptr; 291 PetscFreeSpaceList free_space=NULL,current_space=NULL; 292 PetscBT lnkbt; 293 PetscBool missing; 294 295 PetscFunctionBegin; 296 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 297 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 298 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 299 300 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 301 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 302 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 303 304 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 305 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 306 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 307 bi[0] = bdiag[0] = 0; 308 309 /* linked list for storing column indices of the active row */ 310 nlnk = n + 1; 311 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 312 313 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 314 315 /* initial FreeSpace size is f*(ai[n]+1) */ 316 f = info->fill; 317 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 318 current_space = free_space; 319 320 for (i=0; i<n; i++) { 321 /* copy previous fill into linked list */ 322 nzi = 0; 323 nnz = ai[r[i]+1] - ai[r[i]]; 324 ajtmp = aj + ai[r[i]]; 325 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 326 nzi += nlnk; 327 328 /* add pivot rows into linked list */ 329 row = lnk[n]; 330 while (row < i) { 331 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 332 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 333 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 334 nzi += nlnk; 335 row = lnk[row]; 336 } 337 bi[i+1] = bi[i] + nzi; 338 im[i] = nzi; 339 340 /* mark bdiag */ 341 nzbd = 0; 342 nnz = nzi; 343 k = lnk[n]; 344 while (nnz-- && k < i) { 345 nzbd++; 346 k = lnk[k]; 347 } 348 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 349 350 /* if free space is not available, make more free space */ 351 if (current_space->local_remaining<nzi) { 352 /* estimated additional space needed */ 353 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi)); 354 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 355 reallocs++; 356 } 357 358 /* copy data into free space, then initialize lnk */ 359 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 360 361 bi_ptr[i] = current_space->array; 362 current_space->array += nzi; 363 current_space->local_used += nzi; 364 current_space->local_remaining -= nzi; 365 } 366 367 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 368 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 369 370 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 371 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 372 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 373 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 374 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 375 376 /* put together the new matrix */ 377 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 378 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 379 b = (Mat_SeqAIJ*)(B)->data; 380 381 b->free_a = PETSC_TRUE; 382 b->free_ij = PETSC_TRUE; 383 b->singlemalloc = PETSC_FALSE; 384 385 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 386 387 b->j = bj; 388 b->i = bi; 389 b->diag = bdiag; 390 b->ilen = 0; 391 b->imax = 0; 392 b->row = isrow; 393 b->col = iscol; 394 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 395 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 396 b->icol = isicol; 397 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 398 399 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 400 ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 401 b->maxnz = b->nz = bdiag[0]+1; 402 403 B->factortype = MAT_FACTOR_LU; 404 B->info.factor_mallocs = reallocs; 405 B->info.fill_ratio_given = f; 406 407 if (ai[n]) { 408 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 409 } else { 410 B->info.fill_ratio_needed = 0.0; 411 } 412 #if defined(PETSC_USE_INFO) 413 if (ai[n] != 0) { 414 PetscReal af = B->info.fill_ratio_needed; 415 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 416 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 417 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 418 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 419 } else { 420 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 421 } 422 #endif 423 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 424 if (a->inode.size) { 425 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 426 } 427 ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 /* 432 Trouble in factorization, should we dump the original matrix? 433 */ 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatFactorDumpMatrix" 436 PetscErrorCode MatFactorDumpMatrix(Mat A) 437 { 438 PetscErrorCode ierr; 439 PetscBool flg = PETSC_FALSE; 440 441 PetscFunctionBegin; 442 ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 443 if (flg) { 444 PetscViewer viewer; 445 char filename[PETSC_MAX_PATH_LEN]; 446 447 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 448 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 449 ierr = MatView(A,viewer);CHKERRQ(ierr); 450 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 451 } 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 457 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 458 { 459 Mat C =B; 460 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 461 IS isrow = b->row,isicol = b->icol; 462 PetscErrorCode ierr; 463 const PetscInt *r,*ic,*ics; 464 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 465 PetscInt i,j,k,nz,nzL,row,*pj; 466 const PetscInt *ajtmp,*bjtmp; 467 MatScalar *rtmp,*pc,multiplier,*pv; 468 const MatScalar *aa=a->a,*v; 469 PetscBool row_identity,col_identity; 470 FactorShiftCtx sctx; 471 const PetscInt *ddiag; 472 PetscReal rs; 473 MatScalar d; 474 475 PetscFunctionBegin; 476 /* MatPivotSetUp(): initialize shift context sctx */ 477 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 478 479 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 480 ddiag = a->diag; 481 sctx.shift_top = info->zeropivot; 482 for (i=0; i<n; i++) { 483 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 484 d = (aa)[ddiag[i]]; 485 rs = -PetscAbsScalar(d) - PetscRealPart(d); 486 v = aa+ai[i]; 487 nz = ai[i+1] - ai[i]; 488 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 489 if (rs>sctx.shift_top) sctx.shift_top = rs; 490 } 491 sctx.shift_top *= 1.1; 492 sctx.nshift_max = 5; 493 sctx.shift_lo = 0.; 494 sctx.shift_hi = 1.; 495 } 496 497 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 498 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 499 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 500 ics = ic; 501 502 do { 503 sctx.newshift = PETSC_FALSE; 504 for (i=0; i<n; i++) { 505 /* zero rtmp */ 506 /* L part */ 507 nz = bi[i+1] - bi[i]; 508 bjtmp = bj + bi[i]; 509 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 510 511 /* U part */ 512 nz = bdiag[i]-bdiag[i+1]; 513 bjtmp = bj + bdiag[i+1]+1; 514 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 515 516 /* load in initial (unfactored row) */ 517 nz = ai[r[i]+1] - ai[r[i]]; 518 ajtmp = aj + ai[r[i]]; 519 v = aa + ai[r[i]]; 520 for (j=0; j<nz; j++) { 521 rtmp[ics[ajtmp[j]]] = v[j]; 522 } 523 /* ZeropivotApply() */ 524 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 525 526 /* elimination */ 527 bjtmp = bj + bi[i]; 528 row = *bjtmp++; 529 nzL = bi[i+1] - bi[i]; 530 for (k=0; k < nzL; k++) { 531 pc = rtmp + row; 532 if (*pc != 0.0) { 533 pv = b->a + bdiag[row]; 534 multiplier = *pc * (*pv); 535 *pc = multiplier; 536 537 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 538 pv = b->a + bdiag[row+1]+1; 539 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 540 541 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 542 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 543 } 544 row = *bjtmp++; 545 } 546 547 /* finished row so stick it into b->a */ 548 rs = 0.0; 549 /* L part */ 550 pv = b->a + bi[i]; 551 pj = b->j + bi[i]; 552 nz = bi[i+1] - bi[i]; 553 for (j=0; j<nz; j++) { 554 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 555 } 556 557 /* U part */ 558 pv = b->a + bdiag[i+1]+1; 559 pj = b->j + bdiag[i+1]+1; 560 nz = bdiag[i] - bdiag[i+1]-1; 561 for (j=0; j<nz; j++) { 562 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 563 } 564 565 sctx.rs = rs; 566 sctx.pv = rtmp[i]; 567 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 568 if (sctx.newshift) break; /* break for-loop */ 569 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 570 571 /* Mark diagonal and invert diagonal for simplier triangular solves */ 572 pv = b->a + bdiag[i]; 573 *pv = 1.0/rtmp[i]; 574 575 } /* endof for (i=0; i<n; i++) { */ 576 577 /* MatPivotRefine() */ 578 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 579 /* 580 * if no shift in this attempt & shifting & started shifting & can refine, 581 * then try lower shift 582 */ 583 sctx.shift_hi = sctx.shift_fraction; 584 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 585 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 586 sctx.newshift = PETSC_TRUE; 587 sctx.nshift++; 588 } 589 } while (sctx.newshift); 590 591 ierr = PetscFree(rtmp);CHKERRQ(ierr); 592 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 593 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 594 595 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 596 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 597 if (b->inode.size) { 598 C->ops->solve = MatSolve_SeqAIJ_Inode; 599 } else if (row_identity && col_identity) { 600 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 601 } else { 602 C->ops->solve = MatSolve_SeqAIJ; 603 } 604 C->ops->solveadd = MatSolveAdd_SeqAIJ; 605 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 606 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 607 C->ops->matsolve = MatMatSolve_SeqAIJ; 608 C->assembled = PETSC_TRUE; 609 C->preallocated = PETSC_TRUE; 610 611 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 612 613 /* MatShiftView(A,info,&sctx) */ 614 if (sctx.nshift) { 615 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 616 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 617 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 618 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 619 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 620 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 628 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 629 { 630 Mat C =B; 631 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 632 IS isrow = b->row,isicol = b->icol; 633 PetscErrorCode ierr; 634 const PetscInt *r,*ic,*ics; 635 PetscInt nz,row,i,j,n=A->rmap->n,diag; 636 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 637 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 638 MatScalar *pv,*rtmp,*pc,multiplier,d; 639 const MatScalar *v,*aa=a->a; 640 PetscReal rs=0.0; 641 FactorShiftCtx sctx; 642 const PetscInt *ddiag; 643 PetscBool row_identity, col_identity; 644 645 PetscFunctionBegin; 646 /* MatPivotSetUp(): initialize shift context sctx */ 647 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 648 649 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 650 ddiag = a->diag; 651 sctx.shift_top = info->zeropivot; 652 for (i=0; i<n; i++) { 653 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 654 d = (aa)[ddiag[i]]; 655 rs = -PetscAbsScalar(d) - PetscRealPart(d); 656 v = aa+ai[i]; 657 nz = ai[i+1] - ai[i]; 658 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 659 if (rs>sctx.shift_top) sctx.shift_top = rs; 660 } 661 sctx.shift_top *= 1.1; 662 sctx.nshift_max = 5; 663 sctx.shift_lo = 0.; 664 sctx.shift_hi = 1.; 665 } 666 667 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 668 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 669 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 670 ics = ic; 671 672 do { 673 sctx.newshift = PETSC_FALSE; 674 for (i=0; i<n; i++) { 675 nz = bi[i+1] - bi[i]; 676 bjtmp = bj + bi[i]; 677 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 678 679 /* load in initial (unfactored row) */ 680 nz = ai[r[i]+1] - ai[r[i]]; 681 ajtmp = aj + ai[r[i]]; 682 v = aa + ai[r[i]]; 683 for (j=0; j<nz; j++) { 684 rtmp[ics[ajtmp[j]]] = v[j]; 685 } 686 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 687 688 row = *bjtmp++; 689 while (row < i) { 690 pc = rtmp + row; 691 if (*pc != 0.0) { 692 pv = b->a + diag_offset[row]; 693 pj = b->j + diag_offset[row] + 1; 694 multiplier = *pc / *pv++; 695 *pc = multiplier; 696 nz = bi[row+1] - diag_offset[row] - 1; 697 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 698 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 699 } 700 row = *bjtmp++; 701 } 702 /* finished row so stick it into b->a */ 703 pv = b->a + bi[i]; 704 pj = b->j + bi[i]; 705 nz = bi[i+1] - bi[i]; 706 diag = diag_offset[i] - bi[i]; 707 rs = 0.0; 708 for (j=0; j<nz; j++) { 709 pv[j] = rtmp[pj[j]]; 710 rs += PetscAbsScalar(pv[j]); 711 } 712 rs -= PetscAbsScalar(pv[diag]); 713 714 sctx.rs = rs; 715 sctx.pv = pv[diag]; 716 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 717 if (sctx.newshift) break; 718 pv[diag] = sctx.pv; 719 } 720 721 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 722 /* 723 * if no shift in this attempt & shifting & started shifting & can refine, 724 * then try lower shift 725 */ 726 sctx.shift_hi = sctx.shift_fraction; 727 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 728 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 729 sctx.newshift = PETSC_TRUE; 730 sctx.nshift++; 731 } 732 } while (sctx.newshift); 733 734 /* invert diagonal entries for simplier triangular solves */ 735 for (i=0; i<n; i++) { 736 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 737 } 738 ierr = PetscFree(rtmp);CHKERRQ(ierr); 739 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 740 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 741 742 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 743 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 744 if (row_identity && col_identity) { 745 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 746 } else { 747 C->ops->solve = MatSolve_SeqAIJ_inplace; 748 } 749 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 750 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 751 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 752 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 753 754 C->assembled = PETSC_TRUE; 755 C->preallocated = PETSC_TRUE; 756 757 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 758 if (sctx.nshift) { 759 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 760 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 761 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 762 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 763 } 764 } 765 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 766 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 767 768 ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 /* 773 This routine implements inplace ILU(0) with row or/and column permutations. 774 Input: 775 A - original matrix 776 Output; 777 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 778 a->j (col index) is permuted by the inverse of colperm, then sorted 779 a->a reordered accordingly with a->j 780 a->diag (ptr to diagonal elements) is updated. 781 */ 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 784 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 785 { 786 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 787 IS isrow = a->row,isicol = a->icol; 788 PetscErrorCode ierr; 789 const PetscInt *r,*ic,*ics; 790 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 791 PetscInt *ajtmp,nz,row; 792 PetscInt *diag = a->diag,nbdiag,*pj; 793 PetscScalar *rtmp,*pc,multiplier,d; 794 MatScalar *pv,*v; 795 PetscReal rs; 796 FactorShiftCtx sctx; 797 const MatScalar *aa=a->a,*vtmp; 798 799 PetscFunctionBegin; 800 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 801 802 /* MatPivotSetUp(): initialize shift context sctx */ 803 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 804 805 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 806 const PetscInt *ddiag = a->diag; 807 sctx.shift_top = info->zeropivot; 808 for (i=0; i<n; i++) { 809 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 810 d = (aa)[ddiag[i]]; 811 rs = -PetscAbsScalar(d) - PetscRealPart(d); 812 vtmp = aa+ai[i]; 813 nz = ai[i+1] - ai[i]; 814 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 815 if (rs>sctx.shift_top) sctx.shift_top = rs; 816 } 817 sctx.shift_top *= 1.1; 818 sctx.nshift_max = 5; 819 sctx.shift_lo = 0.; 820 sctx.shift_hi = 1.; 821 } 822 823 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 824 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 825 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 826 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 827 ics = ic; 828 829 #if defined(MV) 830 sctx.shift_top = 0.; 831 sctx.nshift_max = 0; 832 sctx.shift_lo = 0.; 833 sctx.shift_hi = 0.; 834 sctx.shift_fraction = 0.; 835 836 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 837 sctx.shift_top = 0.; 838 for (i=0; i<n; i++) { 839 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 840 d = (a->a)[diag[i]]; 841 rs = -PetscAbsScalar(d) - PetscRealPart(d); 842 v = a->a+ai[i]; 843 nz = ai[i+1] - ai[i]; 844 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 845 if (rs>sctx.shift_top) sctx.shift_top = rs; 846 } 847 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 848 sctx.shift_top *= 1.1; 849 sctx.nshift_max = 5; 850 sctx.shift_lo = 0.; 851 sctx.shift_hi = 1.; 852 } 853 854 sctx.shift_amount = 0.; 855 sctx.nshift = 0; 856 #endif 857 858 do { 859 sctx.newshift = PETSC_FALSE; 860 for (i=0; i<n; i++) { 861 /* load in initial unfactored row */ 862 nz = ai[r[i]+1] - ai[r[i]]; 863 ajtmp = aj + ai[r[i]]; 864 v = a->a + ai[r[i]]; 865 /* sort permuted ajtmp and values v accordingly */ 866 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 867 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 868 869 diag[r[i]] = ai[r[i]]; 870 for (j=0; j<nz; j++) { 871 rtmp[ajtmp[j]] = v[j]; 872 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 873 } 874 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 875 876 row = *ajtmp++; 877 while (row < i) { 878 pc = rtmp + row; 879 if (*pc != 0.0) { 880 pv = a->a + diag[r[row]]; 881 pj = aj + diag[r[row]] + 1; 882 883 multiplier = *pc / *pv++; 884 *pc = multiplier; 885 nz = ai[r[row]+1] - diag[r[row]] - 1; 886 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 887 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 888 } 889 row = *ajtmp++; 890 } 891 /* finished row so overwrite it onto a->a */ 892 pv = a->a + ai[r[i]]; 893 pj = aj + ai[r[i]]; 894 nz = ai[r[i]+1] - ai[r[i]]; 895 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 896 897 rs = 0.0; 898 for (j=0; j<nz; j++) { 899 pv[j] = rtmp[pj[j]]; 900 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 901 } 902 903 sctx.rs = rs; 904 sctx.pv = pv[nbdiag]; 905 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 906 if (sctx.newshift) break; 907 pv[nbdiag] = sctx.pv; 908 } 909 910 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 911 /* 912 * if no shift in this attempt & shifting & started shifting & can refine, 913 * then try lower shift 914 */ 915 sctx.shift_hi = sctx.shift_fraction; 916 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 917 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 918 sctx.newshift = PETSC_TRUE; 919 sctx.nshift++; 920 } 921 } while (sctx.newshift); 922 923 /* invert diagonal entries for simplier triangular solves */ 924 for (i=0; i<n; i++) { 925 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 926 } 927 928 ierr = PetscFree(rtmp);CHKERRQ(ierr); 929 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 930 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 931 932 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 933 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 934 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 935 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 936 937 A->assembled = PETSC_TRUE; 938 A->preallocated = PETSC_TRUE; 939 940 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 941 if (sctx.nshift) { 942 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 943 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 944 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 945 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 946 } 947 } 948 PetscFunctionReturn(0); 949 } 950 951 /* ----------------------------------------------------------- */ 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatLUFactor_SeqAIJ" 954 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 955 { 956 PetscErrorCode ierr; 957 Mat C; 958 959 PetscFunctionBegin; 960 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 961 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 962 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 963 964 A->ops->solve = C->ops->solve; 965 A->ops->solvetranspose = C->ops->solvetranspose; 966 967 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 968 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 /* ----------------------------------------------------------- */ 972 973 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 976 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 977 { 978 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 979 IS iscol = a->col,isrow = a->row; 980 PetscErrorCode ierr; 981 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 982 PetscInt nz; 983 const PetscInt *rout,*cout,*r,*c; 984 PetscScalar *x,*tmp,*tmps,sum; 985 const PetscScalar *b; 986 const MatScalar *aa = a->a,*v; 987 988 PetscFunctionBegin; 989 if (!n) PetscFunctionReturn(0); 990 991 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 992 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 993 tmp = a->solve_work; 994 995 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 996 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 997 998 /* forward solve the lower triangular */ 999 tmp[0] = b[*r++]; 1000 tmps = tmp; 1001 for (i=1; i<n; i++) { 1002 v = aa + ai[i]; 1003 vi = aj + ai[i]; 1004 nz = a->diag[i] - ai[i]; 1005 sum = b[*r++]; 1006 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1007 tmp[i] = sum; 1008 } 1009 1010 /* backward solve the upper triangular */ 1011 for (i=n-1; i>=0; i--) { 1012 v = aa + a->diag[i] + 1; 1013 vi = aj + a->diag[i] + 1; 1014 nz = ai[i+1] - a->diag[i] - 1; 1015 sum = tmp[i]; 1016 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1017 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1018 } 1019 1020 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1021 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1022 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1023 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1024 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1030 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1031 { 1032 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1033 IS iscol = a->col,isrow = a->row; 1034 PetscErrorCode ierr; 1035 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1036 PetscInt nz,neq; 1037 const PetscInt *rout,*cout,*r,*c; 1038 PetscScalar *x,*b,*tmp,*tmps,sum; 1039 const MatScalar *aa = a->a,*v; 1040 PetscBool bisdense,xisdense; 1041 1042 PetscFunctionBegin; 1043 if (!n) PetscFunctionReturn(0); 1044 1045 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1046 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1047 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1048 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1049 1050 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1051 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1052 1053 tmp = a->solve_work; 1054 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1055 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1056 1057 for (neq=0; neq<B->cmap->n; neq++) { 1058 /* forward solve the lower triangular */ 1059 tmp[0] = b[r[0]]; 1060 tmps = tmp; 1061 for (i=1; i<n; i++) { 1062 v = aa + ai[i]; 1063 vi = aj + ai[i]; 1064 nz = a->diag[i] - ai[i]; 1065 sum = b[r[i]]; 1066 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1067 tmp[i] = sum; 1068 } 1069 /* backward solve the upper triangular */ 1070 for (i=n-1; i>=0; i--) { 1071 v = aa + a->diag[i] + 1; 1072 vi = aj + a->diag[i] + 1; 1073 nz = ai[i+1] - a->diag[i] - 1; 1074 sum = tmp[i]; 1075 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1076 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1077 } 1078 1079 b += n; 1080 x += n; 1081 } 1082 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1083 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1084 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1085 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1086 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1092 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1093 { 1094 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1095 IS iscol = a->col,isrow = a->row; 1096 PetscErrorCode ierr; 1097 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1098 PetscInt nz,neq; 1099 const PetscInt *rout,*cout,*r,*c; 1100 PetscScalar *x,*b,*tmp,sum; 1101 const MatScalar *aa = a->a,*v; 1102 PetscBool bisdense,xisdense; 1103 1104 PetscFunctionBegin; 1105 if (!n) PetscFunctionReturn(0); 1106 1107 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1108 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1109 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1110 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1111 1112 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1113 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1114 1115 tmp = a->solve_work; 1116 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1117 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1118 1119 for (neq=0; neq<B->cmap->n; neq++) { 1120 /* forward solve the lower triangular */ 1121 tmp[0] = b[r[0]]; 1122 v = aa; 1123 vi = aj; 1124 for (i=1; i<n; i++) { 1125 nz = ai[i+1] - ai[i]; 1126 sum = b[r[i]]; 1127 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1128 tmp[i] = sum; 1129 v += nz; vi += nz; 1130 } 1131 1132 /* backward solve the upper triangular */ 1133 for (i=n-1; i>=0; i--) { 1134 v = aa + adiag[i+1]+1; 1135 vi = aj + adiag[i+1]+1; 1136 nz = adiag[i]-adiag[i+1]-1; 1137 sum = tmp[i]; 1138 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1139 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1140 } 1141 1142 b += n; 1143 x += n; 1144 } 1145 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1146 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1147 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1148 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1149 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1155 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1156 { 1157 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1158 IS iscol = a->col,isrow = a->row; 1159 PetscErrorCode ierr; 1160 const PetscInt *r,*c,*rout,*cout; 1161 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1162 PetscInt nz,row; 1163 PetscScalar *x,*b,*tmp,*tmps,sum; 1164 const MatScalar *aa = a->a,*v; 1165 1166 PetscFunctionBegin; 1167 if (!n) PetscFunctionReturn(0); 1168 1169 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1170 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1171 tmp = a->solve_work; 1172 1173 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1174 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1175 1176 /* forward solve the lower triangular */ 1177 tmp[0] = b[*r++]; 1178 tmps = tmp; 1179 for (row=1; row<n; row++) { 1180 i = rout[row]; /* permuted row */ 1181 v = aa + ai[i]; 1182 vi = aj + ai[i]; 1183 nz = a->diag[i] - ai[i]; 1184 sum = b[*r++]; 1185 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1186 tmp[row] = sum; 1187 } 1188 1189 /* backward solve the upper triangular */ 1190 for (row=n-1; row>=0; row--) { 1191 i = rout[row]; /* permuted row */ 1192 v = aa + a->diag[i] + 1; 1193 vi = aj + a->diag[i] + 1; 1194 nz = ai[i+1] - a->diag[i] - 1; 1195 sum = tmp[row]; 1196 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1197 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1198 } 1199 1200 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1201 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1202 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1203 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1204 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 /* ----------------------------------------------------------- */ 1209 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1212 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1213 { 1214 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 PetscInt n = A->rmap->n; 1217 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1218 PetscScalar *x; 1219 const PetscScalar *b; 1220 const MatScalar *aa = a->a; 1221 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1222 PetscInt adiag_i,i,nz,ai_i; 1223 const PetscInt *vi; 1224 const MatScalar *v; 1225 PetscScalar sum; 1226 #endif 1227 1228 PetscFunctionBegin; 1229 if (!n) PetscFunctionReturn(0); 1230 1231 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1232 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1233 1234 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1235 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1236 #else 1237 /* forward solve the lower triangular */ 1238 x[0] = b[0]; 1239 for (i=1; i<n; i++) { 1240 ai_i = ai[i]; 1241 v = aa + ai_i; 1242 vi = aj + ai_i; 1243 nz = adiag[i] - ai_i; 1244 sum = b[i]; 1245 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1246 x[i] = sum; 1247 } 1248 1249 /* backward solve the upper triangular */ 1250 for (i=n-1; i>=0; i--) { 1251 adiag_i = adiag[i]; 1252 v = aa + adiag_i + 1; 1253 vi = aj + adiag_i + 1; 1254 nz = ai[i+1] - adiag_i - 1; 1255 sum = x[i]; 1256 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1257 x[i] = sum*aa[adiag_i]; 1258 } 1259 #endif 1260 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1261 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1262 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1268 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1269 { 1270 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1271 IS iscol = a->col,isrow = a->row; 1272 PetscErrorCode ierr; 1273 PetscInt i, n = A->rmap->n,j; 1274 PetscInt nz; 1275 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1276 PetscScalar *x,*tmp,sum; 1277 const PetscScalar *b; 1278 const MatScalar *aa = a->a,*v; 1279 1280 PetscFunctionBegin; 1281 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1282 1283 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1284 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1285 tmp = a->solve_work; 1286 1287 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1288 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1289 1290 /* forward solve the lower triangular */ 1291 tmp[0] = b[*r++]; 1292 for (i=1; i<n; i++) { 1293 v = aa + ai[i]; 1294 vi = aj + ai[i]; 1295 nz = a->diag[i] - ai[i]; 1296 sum = b[*r++]; 1297 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1298 tmp[i] = sum; 1299 } 1300 1301 /* backward solve the upper triangular */ 1302 for (i=n-1; i>=0; i--) { 1303 v = aa + a->diag[i] + 1; 1304 vi = aj + a->diag[i] + 1; 1305 nz = ai[i+1] - a->diag[i] - 1; 1306 sum = tmp[i]; 1307 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1308 tmp[i] = sum*aa[a->diag[i]]; 1309 x[*c--] += tmp[i]; 1310 } 1311 1312 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1313 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1314 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1315 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1316 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1322 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1323 { 1324 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1325 IS iscol = a->col,isrow = a->row; 1326 PetscErrorCode ierr; 1327 PetscInt i, n = A->rmap->n,j; 1328 PetscInt nz; 1329 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1330 PetscScalar *x,*tmp,sum; 1331 const PetscScalar *b; 1332 const MatScalar *aa = a->a,*v; 1333 1334 PetscFunctionBegin; 1335 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1336 1337 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1338 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1339 tmp = a->solve_work; 1340 1341 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1342 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1343 1344 /* forward solve the lower triangular */ 1345 tmp[0] = b[r[0]]; 1346 v = aa; 1347 vi = aj; 1348 for (i=1; i<n; i++) { 1349 nz = ai[i+1] - ai[i]; 1350 sum = b[r[i]]; 1351 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1352 tmp[i] = sum; 1353 v += nz; 1354 vi += nz; 1355 } 1356 1357 /* backward solve the upper triangular */ 1358 v = aa + adiag[n-1]; 1359 vi = aj + adiag[n-1]; 1360 for (i=n-1; i>=0; i--) { 1361 nz = adiag[i] - adiag[i+1] - 1; 1362 sum = tmp[i]; 1363 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1364 tmp[i] = sum*v[nz]; 1365 x[c[i]] += tmp[i]; 1366 v += nz+1; vi += nz+1; 1367 } 1368 1369 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1370 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1371 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1372 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1373 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1379 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1380 { 1381 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1382 IS iscol = a->col,isrow = a->row; 1383 PetscErrorCode ierr; 1384 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1385 PetscInt i,n = A->rmap->n,j; 1386 PetscInt nz; 1387 PetscScalar *x,*tmp,s1; 1388 const MatScalar *aa = a->a,*v; 1389 const PetscScalar *b; 1390 1391 PetscFunctionBegin; 1392 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1393 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1394 tmp = a->solve_work; 1395 1396 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1397 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1398 1399 /* copy the b into temp work space according to permutation */ 1400 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1401 1402 /* forward solve the U^T */ 1403 for (i=0; i<n; i++) { 1404 v = aa + diag[i]; 1405 vi = aj + diag[i] + 1; 1406 nz = ai[i+1] - diag[i] - 1; 1407 s1 = tmp[i]; 1408 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1409 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1410 tmp[i] = s1; 1411 } 1412 1413 /* backward solve the L^T */ 1414 for (i=n-1; i>=0; i--) { 1415 v = aa + diag[i] - 1; 1416 vi = aj + diag[i] - 1; 1417 nz = diag[i] - ai[i]; 1418 s1 = tmp[i]; 1419 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1420 } 1421 1422 /* copy tmp into x according to permutation */ 1423 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1424 1425 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1426 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1427 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1429 1430 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1436 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1437 { 1438 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1439 IS iscol = a->col,isrow = a->row; 1440 PetscErrorCode ierr; 1441 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1442 PetscInt i,n = A->rmap->n,j; 1443 PetscInt nz; 1444 PetscScalar *x,*tmp,s1; 1445 const MatScalar *aa = a->a,*v; 1446 const PetscScalar *b; 1447 1448 PetscFunctionBegin; 1449 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1450 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1451 tmp = a->solve_work; 1452 1453 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1454 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1455 1456 /* copy the b into temp work space according to permutation */ 1457 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1458 1459 /* forward solve the U^T */ 1460 for (i=0; i<n; i++) { 1461 v = aa + adiag[i+1] + 1; 1462 vi = aj + adiag[i+1] + 1; 1463 nz = adiag[i] - adiag[i+1] - 1; 1464 s1 = tmp[i]; 1465 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1466 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1467 tmp[i] = s1; 1468 } 1469 1470 /* backward solve the L^T */ 1471 for (i=n-1; i>=0; i--) { 1472 v = aa + ai[i]; 1473 vi = aj + ai[i]; 1474 nz = ai[i+1] - ai[i]; 1475 s1 = tmp[i]; 1476 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1477 } 1478 1479 /* copy tmp into x according to permutation */ 1480 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1481 1482 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1483 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1484 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1485 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1486 1487 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1493 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1494 { 1495 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1496 IS iscol = a->col,isrow = a->row; 1497 PetscErrorCode ierr; 1498 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1499 PetscInt i,n = A->rmap->n,j; 1500 PetscInt nz; 1501 PetscScalar *x,*tmp,s1; 1502 const MatScalar *aa = a->a,*v; 1503 const PetscScalar *b; 1504 1505 PetscFunctionBegin; 1506 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1507 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1508 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1509 tmp = a->solve_work; 1510 1511 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1512 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1513 1514 /* copy the b into temp work space according to permutation */ 1515 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1516 1517 /* forward solve the U^T */ 1518 for (i=0; i<n; i++) { 1519 v = aa + diag[i]; 1520 vi = aj + diag[i] + 1; 1521 nz = ai[i+1] - diag[i] - 1; 1522 s1 = tmp[i]; 1523 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1524 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1525 tmp[i] = s1; 1526 } 1527 1528 /* backward solve the L^T */ 1529 for (i=n-1; i>=0; i--) { 1530 v = aa + diag[i] - 1; 1531 vi = aj + diag[i] - 1; 1532 nz = diag[i] - ai[i]; 1533 s1 = tmp[i]; 1534 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1535 } 1536 1537 /* copy tmp into x according to permutation */ 1538 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1539 1540 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1541 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1542 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1543 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1544 1545 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1551 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1552 { 1553 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1554 IS iscol = a->col,isrow = a->row; 1555 PetscErrorCode ierr; 1556 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1557 PetscInt i,n = A->rmap->n,j; 1558 PetscInt nz; 1559 PetscScalar *x,*tmp,s1; 1560 const MatScalar *aa = a->a,*v; 1561 const PetscScalar *b; 1562 1563 PetscFunctionBegin; 1564 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1565 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1566 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1567 tmp = a->solve_work; 1568 1569 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1570 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1571 1572 /* copy the b into temp work space according to permutation */ 1573 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1574 1575 /* forward solve the U^T */ 1576 for (i=0; i<n; i++) { 1577 v = aa + adiag[i+1] + 1; 1578 vi = aj + adiag[i+1] + 1; 1579 nz = adiag[i] - adiag[i+1] - 1; 1580 s1 = tmp[i]; 1581 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1582 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1583 tmp[i] = s1; 1584 } 1585 1586 1587 /* backward solve the L^T */ 1588 for (i=n-1; i>=0; i--) { 1589 v = aa + ai[i]; 1590 vi = aj + ai[i]; 1591 nz = ai[i+1] - ai[i]; 1592 s1 = tmp[i]; 1593 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1594 } 1595 1596 /* copy tmp into x according to permutation */ 1597 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1598 1599 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1600 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1601 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1602 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1603 1604 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 1608 /* ----------------------------------------------------------------*/ 1609 1610 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 1611 1612 /* 1613 ilu() under revised new data structure. 1614 Factored arrays bj and ba are stored as 1615 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1616 1617 bi=fact->i is an array of size n+1, in which 1618 bi+ 1619 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1620 bi[n]: points to L(n-1,n-1)+1 1621 1622 bdiag=fact->diag is an array of size n+1,in which 1623 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1624 bdiag[n]: points to entry of U(n-1,0)-1 1625 1626 U(i,:) contains bdiag[i] as its last entry, i.e., 1627 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1628 */ 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1631 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1632 { 1633 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1634 PetscErrorCode ierr; 1635 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1636 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1637 IS isicol; 1638 1639 PetscFunctionBegin; 1640 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1641 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1642 b = (Mat_SeqAIJ*)(fact)->data; 1643 1644 /* allocate matrix arrays for new data structure */ 1645 ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 1646 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1647 1648 b->singlemalloc = PETSC_TRUE; 1649 if (!b->diag) { 1650 ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); 1651 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1652 } 1653 bdiag = b->diag; 1654 1655 if (n > 0) { 1656 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1657 } 1658 1659 /* set bi and bj with new data structure */ 1660 bi = b->i; 1661 bj = b->j; 1662 1663 /* L part */ 1664 bi[0] = 0; 1665 for (i=0; i<n; i++) { 1666 nz = adiag[i] - ai[i]; 1667 bi[i+1] = bi[i] + nz; 1668 aj = a->j + ai[i]; 1669 for (j=0; j<nz; j++) { 1670 /* *bj = aj[j]; bj++; */ 1671 bj[k++] = aj[j]; 1672 } 1673 } 1674 1675 /* U part */ 1676 bdiag[n] = bi[n]-1; 1677 for (i=n-1; i>=0; i--) { 1678 nz = ai[i+1] - adiag[i] - 1; 1679 aj = a->j + adiag[i] + 1; 1680 for (j=0; j<nz; j++) { 1681 /* *bj = aj[j]; bj++; */ 1682 bj[k++] = aj[j]; 1683 } 1684 /* diag[i] */ 1685 /* *bj = i; bj++; */ 1686 bj[k++] = i; 1687 bdiag[i] = bdiag[i+1] + nz + 1; 1688 } 1689 1690 fact->factortype = MAT_FACTOR_ILU; 1691 fact->info.factor_mallocs = 0; 1692 fact->info.fill_ratio_given = info->fill; 1693 fact->info.fill_ratio_needed = 1.0; 1694 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1695 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1696 1697 b = (Mat_SeqAIJ*)(fact)->data; 1698 b->row = isrow; 1699 b->col = iscol; 1700 b->icol = isicol; 1701 ierr = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1702 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1703 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1709 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1710 { 1711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1712 IS isicol; 1713 PetscErrorCode ierr; 1714 const PetscInt *r,*ic; 1715 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1716 PetscInt *bi,*cols,nnz,*cols_lvl; 1717 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1718 PetscInt i,levels,diagonal_fill; 1719 PetscBool col_identity,row_identity,missing; 1720 PetscReal f; 1721 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1722 PetscBT lnkbt; 1723 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1724 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1725 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1726 1727 PetscFunctionBegin; 1728 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1729 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1730 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1731 1732 levels = (PetscInt)info->levels; 1733 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1734 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1735 if (!levels && row_identity && col_identity) { 1736 /* special case: ilu(0) with natural ordering */ 1737 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1738 if (a->inode.size) { 1739 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1745 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1746 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1747 1748 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1749 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1750 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1751 bi[0] = bdiag[0] = 0; 1752 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1753 1754 /* create a linked list for storing column indices of the active row */ 1755 nlnk = n + 1; 1756 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1757 1758 /* initial FreeSpace size is f*(ai[n]+1) */ 1759 f = info->fill; 1760 diagonal_fill = (PetscInt)info->diagonal_fill; 1761 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1762 current_space = free_space; 1763 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1764 current_space_lvl = free_space_lvl; 1765 for (i=0; i<n; i++) { 1766 nzi = 0; 1767 /* copy current row into linked list */ 1768 nnz = ai[r[i]+1] - ai[r[i]]; 1769 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1770 cols = aj + ai[r[i]]; 1771 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1772 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1773 nzi += nlnk; 1774 1775 /* make sure diagonal entry is included */ 1776 if (diagonal_fill && lnk[i] == -1) { 1777 fm = n; 1778 while (lnk[fm] < i) fm = lnk[fm]; 1779 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1780 lnk[fm] = i; 1781 lnk_lvl[i] = 0; 1782 nzi++; dcount++; 1783 } 1784 1785 /* add pivot rows into the active row */ 1786 nzbd = 0; 1787 prow = lnk[n]; 1788 while (prow < i) { 1789 nnz = bdiag[prow]; 1790 cols = bj_ptr[prow] + nnz + 1; 1791 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1792 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1793 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1794 nzi += nlnk; 1795 prow = lnk[prow]; 1796 nzbd++; 1797 } 1798 bdiag[i] = nzbd; 1799 bi[i+1] = bi[i] + nzi; 1800 /* if free space is not available, make more free space */ 1801 if (current_space->local_remaining<nzi) { 1802 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */ 1803 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1804 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1805 reallocs++; 1806 } 1807 1808 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1809 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1810 bj_ptr[i] = current_space->array; 1811 bjlvl_ptr[i] = current_space_lvl->array; 1812 1813 /* make sure the active row i has diagonal entry */ 1814 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1815 1816 current_space->array += nzi; 1817 current_space->local_used += nzi; 1818 current_space->local_remaining -= nzi; 1819 current_space_lvl->array += nzi; 1820 current_space_lvl->local_used += nzi; 1821 current_space_lvl->local_remaining -= nzi; 1822 } 1823 1824 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1825 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1826 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1827 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1828 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1829 1830 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1831 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1832 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1833 1834 #if defined(PETSC_USE_INFO) 1835 { 1836 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1837 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1838 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1839 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1840 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1841 if (diagonal_fill) { 1842 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 1843 } 1844 } 1845 #endif 1846 /* put together the new matrix */ 1847 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1848 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 1849 b = (Mat_SeqAIJ*)(fact)->data; 1850 1851 b->free_a = PETSC_TRUE; 1852 b->free_ij = PETSC_TRUE; 1853 b->singlemalloc = PETSC_FALSE; 1854 1855 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 1856 1857 b->j = bj; 1858 b->i = bi; 1859 b->diag = bdiag; 1860 b->ilen = 0; 1861 b->imax = 0; 1862 b->row = isrow; 1863 b->col = iscol; 1864 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1865 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1866 b->icol = isicol; 1867 1868 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 1869 /* In b structure: Free imax, ilen, old a, old j. 1870 Allocate bdiag, solve_work, new a, new j */ 1871 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1872 b->maxnz = b->nz = bdiag[0]+1; 1873 1874 (fact)->info.factor_mallocs = reallocs; 1875 (fact)->info.fill_ratio_given = f; 1876 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1877 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1878 if (a->inode.size) { 1879 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1880 } 1881 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1887 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1888 { 1889 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1890 IS isicol; 1891 PetscErrorCode ierr; 1892 const PetscInt *r,*ic; 1893 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1894 PetscInt *bi,*cols,nnz,*cols_lvl; 1895 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1896 PetscInt i,levels,diagonal_fill; 1897 PetscBool col_identity,row_identity; 1898 PetscReal f; 1899 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1900 PetscBT lnkbt; 1901 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1902 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1903 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1904 PetscBool missing; 1905 1906 PetscFunctionBegin; 1907 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1908 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1909 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1910 1911 f = info->fill; 1912 levels = (PetscInt)info->levels; 1913 diagonal_fill = (PetscInt)info->diagonal_fill; 1914 1915 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1916 1917 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1918 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1919 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1920 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1921 1922 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1923 if (a->inode.size) { 1924 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1925 } 1926 fact->factortype = MAT_FACTOR_ILU; 1927 (fact)->info.factor_mallocs = 0; 1928 (fact)->info.fill_ratio_given = info->fill; 1929 (fact)->info.fill_ratio_needed = 1.0; 1930 1931 b = (Mat_SeqAIJ*)(fact)->data; 1932 b->row = isrow; 1933 b->col = iscol; 1934 b->icol = isicol; 1935 ierr = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1936 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1937 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1942 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1943 1944 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1945 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1946 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1947 bi[0] = bdiag[0] = 0; 1948 1949 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1950 1951 /* create a linked list for storing column indices of the active row */ 1952 nlnk = n + 1; 1953 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1954 1955 /* initial FreeSpace size is f*(ai[n]+1) */ 1956 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1957 current_space = free_space; 1958 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1959 current_space_lvl = free_space_lvl; 1960 1961 for (i=0; i<n; i++) { 1962 nzi = 0; 1963 /* copy current row into linked list */ 1964 nnz = ai[r[i]+1] - ai[r[i]]; 1965 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1966 cols = aj + ai[r[i]]; 1967 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1968 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1969 nzi += nlnk; 1970 1971 /* make sure diagonal entry is included */ 1972 if (diagonal_fill && lnk[i] == -1) { 1973 fm = n; 1974 while (lnk[fm] < i) fm = lnk[fm]; 1975 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1976 lnk[fm] = i; 1977 lnk_lvl[i] = 0; 1978 nzi++; dcount++; 1979 } 1980 1981 /* add pivot rows into the active row */ 1982 nzbd = 0; 1983 prow = lnk[n]; 1984 while (prow < i) { 1985 nnz = bdiag[prow]; 1986 cols = bj_ptr[prow] + nnz + 1; 1987 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1988 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1989 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1990 nzi += nlnk; 1991 prow = lnk[prow]; 1992 nzbd++; 1993 } 1994 bdiag[i] = nzbd; 1995 bi[i+1] = bi[i] + nzi; 1996 1997 /* if free space is not available, make more free space */ 1998 if (current_space->local_remaining<nzi) { 1999 nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */ 2000 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2001 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2002 reallocs++; 2003 } 2004 2005 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2006 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2007 bj_ptr[i] = current_space->array; 2008 bjlvl_ptr[i] = current_space_lvl->array; 2009 2010 /* make sure the active row i has diagonal entry */ 2011 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2012 2013 current_space->array += nzi; 2014 current_space->local_used += nzi; 2015 current_space->local_remaining -= nzi; 2016 current_space_lvl->array += nzi; 2017 current_space_lvl->local_used += nzi; 2018 current_space_lvl->local_remaining -= nzi; 2019 } 2020 2021 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2022 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2023 2024 /* destroy list of free space and other temporary arrays */ 2025 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 2026 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2027 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2028 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2029 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2030 2031 #if defined(PETSC_USE_INFO) 2032 { 2033 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2034 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 2035 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2036 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 2037 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2038 if (diagonal_fill) { 2039 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 2040 } 2041 } 2042 #endif 2043 2044 /* put together the new matrix */ 2045 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2046 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 2047 b = (Mat_SeqAIJ*)(fact)->data; 2048 2049 b->free_a = PETSC_TRUE; 2050 b->free_ij = PETSC_TRUE; 2051 b->singlemalloc = PETSC_FALSE; 2052 2053 ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr); 2054 b->j = bj; 2055 b->i = bi; 2056 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2057 b->diag = bdiag; 2058 b->ilen = 0; 2059 b->imax = 0; 2060 b->row = isrow; 2061 b->col = iscol; 2062 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2063 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2064 b->icol = isicol; 2065 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 2066 /* In b structure: Free imax, ilen, old a, old j. 2067 Allocate bdiag, solve_work, new a, new j */ 2068 ierr = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2069 b->maxnz = b->nz = bi[n]; 2070 2071 (fact)->info.factor_mallocs = reallocs; 2072 (fact)->info.fill_ratio_given = f; 2073 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2074 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2075 if (a->inode.size) { 2076 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2077 } 2078 PetscFunctionReturn(0); 2079 } 2080 2081 #undef __FUNCT__ 2082 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2083 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2084 { 2085 Mat C = B; 2086 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2087 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2088 IS ip=b->row,iip = b->icol; 2089 PetscErrorCode ierr; 2090 const PetscInt *rip,*riip; 2091 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2092 PetscInt *ai=a->i,*aj=a->j; 2093 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2094 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2095 PetscBool perm_identity; 2096 FactorShiftCtx sctx; 2097 PetscReal rs; 2098 MatScalar d,*v; 2099 2100 PetscFunctionBegin; 2101 /* MatPivotSetUp(): initialize shift context sctx */ 2102 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2103 2104 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2105 sctx.shift_top = info->zeropivot; 2106 for (i=0; i<mbs; i++) { 2107 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2108 d = (aa)[a->diag[i]]; 2109 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2110 v = aa+ai[i]; 2111 nz = ai[i+1] - ai[i]; 2112 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2113 if (rs>sctx.shift_top) sctx.shift_top = rs; 2114 } 2115 sctx.shift_top *= 1.1; 2116 sctx.nshift_max = 5; 2117 sctx.shift_lo = 0.; 2118 sctx.shift_hi = 1.; 2119 } 2120 2121 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2122 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2123 2124 /* allocate working arrays 2125 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2126 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2127 */ 2128 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr); 2129 2130 do { 2131 sctx.newshift = PETSC_FALSE; 2132 2133 for (i=0; i<mbs; i++) c2r[i] = mbs; 2134 if (mbs) il[0] = 0; 2135 2136 for (k = 0; k<mbs; k++) { 2137 /* zero rtmp */ 2138 nz = bi[k+1] - bi[k]; 2139 bjtmp = bj + bi[k]; 2140 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2141 2142 /* load in initial unfactored row */ 2143 bval = ba + bi[k]; 2144 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2145 for (j = jmin; j < jmax; j++) { 2146 col = riip[aj[j]]; 2147 if (col >= k) { /* only take upper triangular entry */ 2148 rtmp[col] = aa[j]; 2149 *bval++ = 0.0; /* for in-place factorization */ 2150 } 2151 } 2152 /* shift the diagonal of the matrix: ZeropivotApply() */ 2153 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2154 2155 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2156 dk = rtmp[k]; 2157 i = c2r[k]; /* first row to be added to k_th row */ 2158 2159 while (i < k) { 2160 nexti = c2r[i]; /* next row to be added to k_th row */ 2161 2162 /* compute multiplier, update diag(k) and U(i,k) */ 2163 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2164 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2165 dk += uikdi*ba[ili]; /* update diag[k] */ 2166 ba[ili] = uikdi; /* -U(i,k) */ 2167 2168 /* add multiple of row i to k-th row */ 2169 jmin = ili + 1; jmax = bi[i+1]; 2170 if (jmin < jmax) { 2171 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2172 /* update il and c2r for row i */ 2173 il[i] = jmin; 2174 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2175 } 2176 i = nexti; 2177 } 2178 2179 /* copy data into U(k,:) */ 2180 rs = 0.0; 2181 jmin = bi[k]; jmax = bi[k+1]-1; 2182 if (jmin < jmax) { 2183 for (j=jmin; j<jmax; j++) { 2184 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2185 } 2186 /* add the k-th row into il and c2r */ 2187 il[k] = jmin; 2188 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2189 } 2190 2191 /* MatPivotCheck() */ 2192 sctx.rs = rs; 2193 sctx.pv = dk; 2194 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 2195 if (sctx.newshift) break; 2196 dk = sctx.pv; 2197 2198 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2199 } 2200 } while (sctx.newshift); 2201 2202 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2203 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2204 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2205 2206 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2207 if (perm_identity) { 2208 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2209 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2210 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2211 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2212 } else { 2213 B->ops->solve = MatSolve_SeqSBAIJ_1; 2214 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2215 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2216 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2217 } 2218 2219 C->assembled = PETSC_TRUE; 2220 C->preallocated = PETSC_TRUE; 2221 2222 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2223 2224 /* MatPivotView() */ 2225 if (sctx.nshift) { 2226 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2227 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 2228 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2229 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2230 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2231 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 2232 } 2233 } 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2239 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2240 { 2241 Mat C = B; 2242 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2243 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2244 IS ip=b->row,iip = b->icol; 2245 PetscErrorCode ierr; 2246 const PetscInt *rip,*riip; 2247 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2248 PetscInt *ai=a->i,*aj=a->j; 2249 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2250 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2251 PetscBool perm_identity; 2252 FactorShiftCtx sctx; 2253 PetscReal rs; 2254 MatScalar d,*v; 2255 2256 PetscFunctionBegin; 2257 /* MatPivotSetUp(): initialize shift context sctx */ 2258 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2259 2260 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2261 sctx.shift_top = info->zeropivot; 2262 for (i=0; i<mbs; i++) { 2263 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2264 d = (aa)[a->diag[i]]; 2265 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2266 v = aa+ai[i]; 2267 nz = ai[i+1] - ai[i]; 2268 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2269 if (rs>sctx.shift_top) sctx.shift_top = rs; 2270 } 2271 sctx.shift_top *= 1.1; 2272 sctx.nshift_max = 5; 2273 sctx.shift_lo = 0.; 2274 sctx.shift_hi = 1.; 2275 } 2276 2277 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2278 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2279 2280 /* initialization */ 2281 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 2282 2283 do { 2284 sctx.newshift = PETSC_FALSE; 2285 2286 for (i=0; i<mbs; i++) jl[i] = mbs; 2287 il[0] = 0; 2288 2289 for (k = 0; k<mbs; k++) { 2290 /* zero rtmp */ 2291 nz = bi[k+1] - bi[k]; 2292 bjtmp = bj + bi[k]; 2293 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2294 2295 bval = ba + bi[k]; 2296 /* initialize k-th row by the perm[k]-th row of A */ 2297 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2298 for (j = jmin; j < jmax; j++) { 2299 col = riip[aj[j]]; 2300 if (col >= k) { /* only take upper triangular entry */ 2301 rtmp[col] = aa[j]; 2302 *bval++ = 0.0; /* for in-place factorization */ 2303 } 2304 } 2305 /* shift the diagonal of the matrix */ 2306 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2307 2308 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2309 dk = rtmp[k]; 2310 i = jl[k]; /* first row to be added to k_th row */ 2311 2312 while (i < k) { 2313 nexti = jl[i]; /* next row to be added to k_th row */ 2314 2315 /* compute multiplier, update diag(k) and U(i,k) */ 2316 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2317 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2318 dk += uikdi*ba[ili]; 2319 ba[ili] = uikdi; /* -U(i,k) */ 2320 2321 /* add multiple of row i to k-th row */ 2322 jmin = ili + 1; jmax = bi[i+1]; 2323 if (jmin < jmax) { 2324 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2325 /* update il and jl for row i */ 2326 il[i] = jmin; 2327 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2328 } 2329 i = nexti; 2330 } 2331 2332 /* shift the diagonals when zero pivot is detected */ 2333 /* compute rs=sum of abs(off-diagonal) */ 2334 rs = 0.0; 2335 jmin = bi[k]+1; 2336 nz = bi[k+1] - jmin; 2337 bcol = bj + jmin; 2338 for (j=0; j<nz; j++) { 2339 rs += PetscAbsScalar(rtmp[bcol[j]]); 2340 } 2341 2342 sctx.rs = rs; 2343 sctx.pv = dk; 2344 ierr = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr); 2345 if (sctx.newshift) break; 2346 dk = sctx.pv; 2347 2348 /* copy data into U(k,:) */ 2349 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2350 jmin = bi[k]+1; jmax = bi[k+1]; 2351 if (jmin < jmax) { 2352 for (j=jmin; j<jmax; j++) { 2353 col = bj[j]; ba[j] = rtmp[col]; 2354 } 2355 /* add the k-th row into il and jl */ 2356 il[k] = jmin; 2357 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2358 } 2359 } 2360 } while (sctx.newshift); 2361 2362 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2363 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2364 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2365 2366 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2367 if (perm_identity) { 2368 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2369 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2370 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2371 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2372 } else { 2373 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2374 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2375 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2376 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2377 } 2378 2379 C->assembled = PETSC_TRUE; 2380 C->preallocated = PETSC_TRUE; 2381 2382 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2383 if (sctx.nshift) { 2384 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2385 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2386 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2387 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2388 } 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 /* 2394 icc() under revised new data structure. 2395 Factored arrays bj and ba are stored as 2396 U(0,:),...,U(i,:),U(n-1,:) 2397 2398 ui=fact->i is an array of size n+1, in which 2399 ui+ 2400 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2401 ui[n]: points to U(n-1,n-1)+1 2402 2403 udiag=fact->diag is an array of size n,in which 2404 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2405 2406 U(i,:) contains udiag[i] as its last entry, i.e., 2407 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2408 */ 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2412 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2413 { 2414 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2415 Mat_SeqSBAIJ *b; 2416 PetscErrorCode ierr; 2417 PetscBool perm_identity,missing; 2418 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2419 const PetscInt *rip,*riip; 2420 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2421 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2422 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2423 PetscReal fill =info->fill,levels=info->levels; 2424 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2425 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2426 PetscBT lnkbt; 2427 IS iperm; 2428 2429 PetscFunctionBegin; 2430 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2431 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2432 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2433 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2434 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2435 2436 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2437 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2438 ui[0] = 0; 2439 2440 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2441 if (!levels && perm_identity) { 2442 for (i=0; i<am; i++) { 2443 ncols = ai[i+1] - a->diag[i]; 2444 ui[i+1] = ui[i] + ncols; 2445 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2446 } 2447 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2448 cols = uj; 2449 for (i=0; i<am; i++) { 2450 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2451 ncols = ai[i+1] - a->diag[i] -1; 2452 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2453 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2454 } 2455 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2456 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2457 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2458 2459 /* initialization */ 2460 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2461 2462 /* jl: linked list for storing indices of the pivot rows 2463 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2464 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2465 for (i=0; i<am; i++) { 2466 jl[i] = am; il[i] = 0; 2467 } 2468 2469 /* create and initialize a linked list for storing column indices of the active row k */ 2470 nlnk = am + 1; 2471 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2472 2473 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2474 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2475 current_space = free_space; 2476 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2477 current_space_lvl = free_space_lvl; 2478 2479 for (k=0; k<am; k++) { /* for each active row k */ 2480 /* initialize lnk by the column indices of row rip[k] of A */ 2481 nzk = 0; 2482 ncols = ai[rip[k]+1] - ai[rip[k]]; 2483 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2484 ncols_upper = 0; 2485 for (j=0; j<ncols; j++) { 2486 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2487 if (riip[i] >= k) { /* only take upper triangular entry */ 2488 ajtmp[ncols_upper] = i; 2489 ncols_upper++; 2490 } 2491 } 2492 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2493 nzk += nlnk; 2494 2495 /* update lnk by computing fill-in for each pivot row to be merged in */ 2496 prow = jl[k]; /* 1st pivot row */ 2497 2498 while (prow < k) { 2499 nextprow = jl[prow]; 2500 2501 /* merge prow into k-th row */ 2502 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2503 jmax = ui[prow+1]; 2504 ncols = jmax-jmin; 2505 i = jmin - ui[prow]; 2506 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2507 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2508 j = *(uj - 1); 2509 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2510 nzk += nlnk; 2511 2512 /* update il and jl for prow */ 2513 if (jmin < jmax) { 2514 il[prow] = jmin; 2515 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2516 } 2517 prow = nextprow; 2518 } 2519 2520 /* if free space is not available, make more free space */ 2521 if (current_space->local_remaining<nzk) { 2522 i = am - k + 1; /* num of unfactored rows */ 2523 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2524 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2525 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2526 reallocs++; 2527 } 2528 2529 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2530 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2531 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2532 2533 /* add the k-th row into il and jl */ 2534 if (nzk > 1) { 2535 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2536 jl[k] = jl[i]; jl[i] = k; 2537 il[k] = ui[k] + 1; 2538 } 2539 uj_ptr[k] = current_space->array; 2540 uj_lvl_ptr[k] = current_space_lvl->array; 2541 2542 current_space->array += nzk; 2543 current_space->local_used += nzk; 2544 current_space->local_remaining -= nzk; 2545 2546 current_space_lvl->array += nzk; 2547 current_space_lvl->local_used += nzk; 2548 current_space_lvl->local_remaining -= nzk; 2549 2550 ui[k+1] = ui[k] + nzk; 2551 } 2552 2553 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2554 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2555 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2556 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2557 2558 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2559 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2560 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2561 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2562 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2563 2564 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2565 2566 /* put together the new matrix in MATSEQSBAIJ format */ 2567 b = (Mat_SeqSBAIJ*)(fact)->data; 2568 b->singlemalloc = PETSC_FALSE; 2569 2570 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2571 2572 b->j = uj; 2573 b->i = ui; 2574 b->diag = udiag; 2575 b->free_diag = PETSC_TRUE; 2576 b->ilen = 0; 2577 b->imax = 0; 2578 b->row = perm; 2579 b->col = perm; 2580 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2581 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2582 b->icol = iperm; 2583 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2584 2585 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2586 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2587 2588 b->maxnz = b->nz = ui[am]; 2589 b->free_a = PETSC_TRUE; 2590 b->free_ij = PETSC_TRUE; 2591 2592 fact->info.factor_mallocs = reallocs; 2593 fact->info.fill_ratio_given = fill; 2594 if (ai[am] != 0) { 2595 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2596 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2597 } else { 2598 fact->info.fill_ratio_needed = 0.0; 2599 } 2600 #if defined(PETSC_USE_INFO) 2601 if (ai[am] != 0) { 2602 PetscReal af = fact->info.fill_ratio_needed; 2603 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2604 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2605 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2606 } else { 2607 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2608 } 2609 #endif 2610 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2616 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2617 { 2618 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2619 Mat_SeqSBAIJ *b; 2620 PetscErrorCode ierr; 2621 PetscBool perm_identity,missing; 2622 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2623 const PetscInt *rip,*riip; 2624 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2625 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2626 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2627 PetscReal fill =info->fill,levels=info->levels; 2628 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2629 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2630 PetscBT lnkbt; 2631 IS iperm; 2632 2633 PetscFunctionBegin; 2634 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2635 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2636 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2637 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2638 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2639 2640 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2641 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2642 ui[0] = 0; 2643 2644 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2645 if (!levels && perm_identity) { 2646 2647 for (i=0; i<am; i++) { 2648 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2649 udiag[i] = ui[i]; 2650 } 2651 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2652 cols = uj; 2653 for (i=0; i<am; i++) { 2654 aj = a->j + a->diag[i]; 2655 ncols = ui[i+1] - ui[i]; 2656 for (j=0; j<ncols; j++) *cols++ = *aj++; 2657 } 2658 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2659 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2660 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2661 2662 /* initialization */ 2663 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2664 2665 /* jl: linked list for storing indices of the pivot rows 2666 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2667 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2668 for (i=0; i<am; i++) { 2669 jl[i] = am; il[i] = 0; 2670 } 2671 2672 /* create and initialize a linked list for storing column indices of the active row k */ 2673 nlnk = am + 1; 2674 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2675 2676 /* initial FreeSpace size is fill*(ai[am]+1) */ 2677 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 2678 current_space = free_space; 2679 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr); 2680 current_space_lvl = free_space_lvl; 2681 2682 for (k=0; k<am; k++) { /* for each active row k */ 2683 /* initialize lnk by the column indices of row rip[k] of A */ 2684 nzk = 0; 2685 ncols = ai[rip[k]+1] - ai[rip[k]]; 2686 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2687 ncols_upper = 0; 2688 for (j=0; j<ncols; j++) { 2689 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2690 if (riip[i] >= k) { /* only take upper triangular entry */ 2691 ajtmp[ncols_upper] = i; 2692 ncols_upper++; 2693 } 2694 } 2695 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2696 nzk += nlnk; 2697 2698 /* update lnk by computing fill-in for each pivot row to be merged in */ 2699 prow = jl[k]; /* 1st pivot row */ 2700 2701 while (prow < k) { 2702 nextprow = jl[prow]; 2703 2704 /* merge prow into k-th row */ 2705 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2706 jmax = ui[prow+1]; 2707 ncols = jmax-jmin; 2708 i = jmin - ui[prow]; 2709 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2710 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2711 j = *(uj - 1); 2712 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2713 nzk += nlnk; 2714 2715 /* update il and jl for prow */ 2716 if (jmin < jmax) { 2717 il[prow] = jmin; 2718 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2719 } 2720 prow = nextprow; 2721 } 2722 2723 /* if free space is not available, make more free space */ 2724 if (current_space->local_remaining<nzk) { 2725 i = am - k + 1; /* num of unfactored rows */ 2726 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2727 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2728 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2729 reallocs++; 2730 } 2731 2732 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2733 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2734 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2735 2736 /* add the k-th row into il and jl */ 2737 if (nzk > 1) { 2738 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2739 jl[k] = jl[i]; jl[i] = k; 2740 il[k] = ui[k] + 1; 2741 } 2742 uj_ptr[k] = current_space->array; 2743 uj_lvl_ptr[k] = current_space_lvl->array; 2744 2745 current_space->array += nzk; 2746 current_space->local_used += nzk; 2747 current_space->local_remaining -= nzk; 2748 2749 current_space_lvl->array += nzk; 2750 current_space_lvl->local_used += nzk; 2751 current_space_lvl->local_remaining -= nzk; 2752 2753 ui[k+1] = ui[k] + nzk; 2754 } 2755 2756 #if defined(PETSC_USE_INFO) 2757 if (ai[am] != 0) { 2758 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2759 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2760 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2761 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2762 } else { 2763 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2764 } 2765 #endif 2766 2767 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2768 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2769 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2770 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2771 2772 /* destroy list of free space and other temporary array(s) */ 2773 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2774 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2775 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2776 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2777 2778 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2779 2780 /* put together the new matrix in MATSEQSBAIJ format */ 2781 2782 b = (Mat_SeqSBAIJ*)fact->data; 2783 b->singlemalloc = PETSC_FALSE; 2784 2785 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2786 2787 b->j = uj; 2788 b->i = ui; 2789 b->diag = udiag; 2790 b->free_diag = PETSC_TRUE; 2791 b->ilen = 0; 2792 b->imax = 0; 2793 b->row = perm; 2794 b->col = perm; 2795 2796 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2797 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2798 2799 b->icol = iperm; 2800 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2801 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2802 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2803 b->maxnz = b->nz = ui[am]; 2804 b->free_a = PETSC_TRUE; 2805 b->free_ij = PETSC_TRUE; 2806 2807 fact->info.factor_mallocs = reallocs; 2808 fact->info.fill_ratio_given = fill; 2809 if (ai[am] != 0) { 2810 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2811 } else { 2812 fact->info.fill_ratio_needed = 0.0; 2813 } 2814 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2815 PetscFunctionReturn(0); 2816 } 2817 2818 #undef __FUNCT__ 2819 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2820 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2821 { 2822 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2823 Mat_SeqSBAIJ *b; 2824 PetscErrorCode ierr; 2825 PetscBool perm_identity,missing; 2826 PetscReal fill = info->fill; 2827 const PetscInt *rip,*riip; 2828 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2829 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2830 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2831 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2832 PetscBT lnkbt; 2833 IS iperm; 2834 2835 PetscFunctionBegin; 2836 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2837 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2838 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2839 2840 /* check whether perm is the identity mapping */ 2841 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2842 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2843 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2844 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2845 2846 /* initialization */ 2847 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2848 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2849 ui[0] = 0; 2850 2851 /* jl: linked list for storing indices of the pivot rows 2852 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2853 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2854 for (i=0; i<am; i++) { 2855 jl[i] = am; il[i] = 0; 2856 } 2857 2858 /* create and initialize a linked list for storing column indices of the active row k */ 2859 nlnk = am + 1; 2860 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2861 2862 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2863 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2864 current_space = free_space; 2865 2866 for (k=0; k<am; k++) { /* for each active row k */ 2867 /* initialize lnk by the column indices of row rip[k] of A */ 2868 nzk = 0; 2869 ncols = ai[rip[k]+1] - ai[rip[k]]; 2870 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2871 ncols_upper = 0; 2872 for (j=0; j<ncols; j++) { 2873 i = riip[*(aj + ai[rip[k]] + j)]; 2874 if (i >= k) { /* only take upper triangular entry */ 2875 cols[ncols_upper] = i; 2876 ncols_upper++; 2877 } 2878 } 2879 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2880 nzk += nlnk; 2881 2882 /* update lnk by computing fill-in for each pivot row to be merged in */ 2883 prow = jl[k]; /* 1st pivot row */ 2884 2885 while (prow < k) { 2886 nextprow = jl[prow]; 2887 /* merge prow into k-th row */ 2888 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2889 jmax = ui[prow+1]; 2890 ncols = jmax-jmin; 2891 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2892 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2893 nzk += nlnk; 2894 2895 /* update il and jl for prow */ 2896 if (jmin < jmax) { 2897 il[prow] = jmin; 2898 j = *uj_ptr; 2899 jl[prow] = jl[j]; 2900 jl[j] = prow; 2901 } 2902 prow = nextprow; 2903 } 2904 2905 /* if free space is not available, make more free space */ 2906 if (current_space->local_remaining<nzk) { 2907 i = am - k + 1; /* num of unfactored rows */ 2908 i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2909 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2910 reallocs++; 2911 } 2912 2913 /* copy data into free space, then initialize lnk */ 2914 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2915 2916 /* add the k-th row into il and jl */ 2917 if (nzk > 1) { 2918 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2919 jl[k] = jl[i]; jl[i] = k; 2920 il[k] = ui[k] + 1; 2921 } 2922 ui_ptr[k] = current_space->array; 2923 2924 current_space->array += nzk; 2925 current_space->local_used += nzk; 2926 current_space->local_remaining -= nzk; 2927 2928 ui[k+1] = ui[k] + nzk; 2929 } 2930 2931 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2932 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2933 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2934 2935 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2936 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2937 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2938 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2939 2940 /* put together the new matrix in MATSEQSBAIJ format */ 2941 2942 b = (Mat_SeqSBAIJ*)fact->data; 2943 b->singlemalloc = PETSC_FALSE; 2944 b->free_a = PETSC_TRUE; 2945 b->free_ij = PETSC_TRUE; 2946 2947 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2948 2949 b->j = uj; 2950 b->i = ui; 2951 b->diag = udiag; 2952 b->free_diag = PETSC_TRUE; 2953 b->ilen = 0; 2954 b->imax = 0; 2955 b->row = perm; 2956 b->col = perm; 2957 2958 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2959 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2960 2961 b->icol = iperm; 2962 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2963 2964 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2965 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2966 2967 b->maxnz = b->nz = ui[am]; 2968 2969 fact->info.factor_mallocs = reallocs; 2970 fact->info.fill_ratio_given = fill; 2971 if (ai[am] != 0) { 2972 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2973 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2974 } else { 2975 fact->info.fill_ratio_needed = 0.0; 2976 } 2977 #if defined(PETSC_USE_INFO) 2978 if (ai[am] != 0) { 2979 PetscReal af = fact->info.fill_ratio_needed; 2980 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2981 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2982 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2983 } else { 2984 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2985 } 2986 #endif 2987 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2988 PetscFunctionReturn(0); 2989 } 2990 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2993 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2994 { 2995 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2996 Mat_SeqSBAIJ *b; 2997 PetscErrorCode ierr; 2998 PetscBool perm_identity,missing; 2999 PetscReal fill = info->fill; 3000 const PetscInt *rip,*riip; 3001 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 3002 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 3003 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 3004 PetscFreeSpaceList free_space=NULL,current_space=NULL; 3005 PetscBT lnkbt; 3006 IS iperm; 3007 3008 PetscFunctionBegin; 3009 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 3010 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3011 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3012 3013 /* check whether perm is the identity mapping */ 3014 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 3015 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 3016 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 3017 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 3018 3019 /* initialization */ 3020 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 3021 ui[0] = 0; 3022 3023 /* jl: linked list for storing indices of the pivot rows 3024 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3025 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 3026 for (i=0; i<am; i++) { 3027 jl[i] = am; il[i] = 0; 3028 } 3029 3030 /* create and initialize a linked list for storing column indices of the active row k */ 3031 nlnk = am + 1; 3032 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3033 3034 /* initial FreeSpace size is fill*(ai[am]+1) */ 3035 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 3036 current_space = free_space; 3037 3038 for (k=0; k<am; k++) { /* for each active row k */ 3039 /* initialize lnk by the column indices of row rip[k] of A */ 3040 nzk = 0; 3041 ncols = ai[rip[k]+1] - ai[rip[k]]; 3042 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3043 ncols_upper = 0; 3044 for (j=0; j<ncols; j++) { 3045 i = riip[*(aj + ai[rip[k]] + j)]; 3046 if (i >= k) { /* only take upper triangular entry */ 3047 cols[ncols_upper] = i; 3048 ncols_upper++; 3049 } 3050 } 3051 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3052 nzk += nlnk; 3053 3054 /* update lnk by computing fill-in for each pivot row to be merged in */ 3055 prow = jl[k]; /* 1st pivot row */ 3056 3057 while (prow < k) { 3058 nextprow = jl[prow]; 3059 /* merge prow into k-th row */ 3060 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3061 jmax = ui[prow+1]; 3062 ncols = jmax-jmin; 3063 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3064 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3065 nzk += nlnk; 3066 3067 /* update il and jl for prow */ 3068 if (jmin < jmax) { 3069 il[prow] = jmin; 3070 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3071 } 3072 prow = nextprow; 3073 } 3074 3075 /* if free space is not available, make more free space */ 3076 if (current_space->local_remaining<nzk) { 3077 i = am - k + 1; /* num of unfactored rows */ 3078 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3079 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3080 reallocs++; 3081 } 3082 3083 /* copy data into free space, then initialize lnk */ 3084 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3085 3086 /* add the k-th row into il and jl */ 3087 if (nzk-1 > 0) { 3088 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3089 jl[k] = jl[i]; jl[i] = k; 3090 il[k] = ui[k] + 1; 3091 } 3092 ui_ptr[k] = current_space->array; 3093 3094 current_space->array += nzk; 3095 current_space->local_used += nzk; 3096 current_space->local_remaining -= nzk; 3097 3098 ui[k+1] = ui[k] + nzk; 3099 } 3100 3101 #if defined(PETSC_USE_INFO) 3102 if (ai[am] != 0) { 3103 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3104 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3105 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3106 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3107 } else { 3108 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3109 } 3110 #endif 3111 3112 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3113 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3114 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3115 3116 /* destroy list of free space and other temporary array(s) */ 3117 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 3118 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3119 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3120 3121 /* put together the new matrix in MATSEQSBAIJ format */ 3122 3123 b = (Mat_SeqSBAIJ*)fact->data; 3124 b->singlemalloc = PETSC_FALSE; 3125 b->free_a = PETSC_TRUE; 3126 b->free_ij = PETSC_TRUE; 3127 3128 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 3129 3130 b->j = uj; 3131 b->i = ui; 3132 b->diag = 0; 3133 b->ilen = 0; 3134 b->imax = 0; 3135 b->row = perm; 3136 b->col = perm; 3137 3138 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3139 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3140 3141 b->icol = iperm; 3142 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3143 3144 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 3145 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3146 b->maxnz = b->nz = ui[am]; 3147 3148 fact->info.factor_mallocs = reallocs; 3149 fact->info.fill_ratio_given = fill; 3150 if (ai[am] != 0) { 3151 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3152 } else { 3153 fact->info.fill_ratio_needed = 0.0; 3154 } 3155 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3156 PetscFunctionReturn(0); 3157 } 3158 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3161 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3162 { 3163 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3164 PetscErrorCode ierr; 3165 PetscInt n = A->rmap->n; 3166 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3167 PetscScalar *x,sum; 3168 const PetscScalar *b; 3169 const MatScalar *aa = a->a,*v; 3170 PetscInt i,nz; 3171 3172 PetscFunctionBegin; 3173 if (!n) PetscFunctionReturn(0); 3174 3175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3176 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3177 3178 /* forward solve the lower triangular */ 3179 x[0] = b[0]; 3180 v = aa; 3181 vi = aj; 3182 for (i=1; i<n; i++) { 3183 nz = ai[i+1] - ai[i]; 3184 sum = b[i]; 3185 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3186 v += nz; 3187 vi += nz; 3188 x[i] = sum; 3189 } 3190 3191 /* backward solve the upper triangular */ 3192 for (i=n-1; i>=0; i--) { 3193 v = aa + adiag[i+1] + 1; 3194 vi = aj + adiag[i+1] + 1; 3195 nz = adiag[i] - adiag[i+1]-1; 3196 sum = x[i]; 3197 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3198 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3199 } 3200 3201 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3202 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3203 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3204 PetscFunctionReturn(0); 3205 } 3206 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "MatSolve_SeqAIJ" 3209 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3210 { 3211 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3212 IS iscol = a->col,isrow = a->row; 3213 PetscErrorCode ierr; 3214 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3215 const PetscInt *rout,*cout,*r,*c; 3216 PetscScalar *x,*tmp,sum; 3217 const PetscScalar *b; 3218 const MatScalar *aa = a->a,*v; 3219 3220 PetscFunctionBegin; 3221 if (!n) PetscFunctionReturn(0); 3222 3223 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3224 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3225 tmp = a->solve_work; 3226 3227 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3228 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3229 3230 /* forward solve the lower triangular */ 3231 tmp[0] = b[r[0]]; 3232 v = aa; 3233 vi = aj; 3234 for (i=1; i<n; i++) { 3235 nz = ai[i+1] - ai[i]; 3236 sum = b[r[i]]; 3237 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3238 tmp[i] = sum; 3239 v += nz; vi += nz; 3240 } 3241 3242 /* backward solve the upper triangular */ 3243 for (i=n-1; i>=0; i--) { 3244 v = aa + adiag[i+1]+1; 3245 vi = aj + adiag[i+1]+1; 3246 nz = adiag[i]-adiag[i+1]-1; 3247 sum = tmp[i]; 3248 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3249 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3250 } 3251 3252 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3253 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3254 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3255 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3256 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3257 PetscFunctionReturn(0); 3258 } 3259 3260 #undef __FUNCT__ 3261 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3262 /* 3263 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3264 */ 3265 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3266 { 3267 Mat B = *fact; 3268 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3269 IS isicol; 3270 PetscErrorCode ierr; 3271 const PetscInt *r,*ic; 3272 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3273 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3274 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3275 PetscInt nlnk,*lnk; 3276 PetscBT lnkbt; 3277 PetscBool row_identity,icol_identity; 3278 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3279 const PetscInt *ics; 3280 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3281 PetscReal dt =info->dt,shift=info->shiftamount; 3282 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3283 PetscBool missing; 3284 3285 PetscFunctionBegin; 3286 if (dt == PETSC_DEFAULT) dt = 0.005; 3287 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3288 3289 /* ------- symbolic factorization, can be reused ---------*/ 3290 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3291 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3292 adiag=a->diag; 3293 3294 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3295 3296 /* bdiag is location of diagonal in factor */ 3297 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3298 ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3299 3300 /* allocate row pointers bi */ 3301 ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr); 3302 3303 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3304 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3305 nnz_max = ai[n]+2*n*dtcount+2; 3306 3307 ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr); 3308 ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr); 3309 3310 /* put together the new matrix */ 3311 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3312 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 3313 b = (Mat_SeqAIJ*)B->data; 3314 3315 b->free_a = PETSC_TRUE; 3316 b->free_ij = PETSC_TRUE; 3317 b->singlemalloc = PETSC_FALSE; 3318 3319 b->a = ba; 3320 b->j = bj; 3321 b->i = bi; 3322 b->diag = bdiag; 3323 b->ilen = 0; 3324 b->imax = 0; 3325 b->row = isrow; 3326 b->col = iscol; 3327 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3328 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3329 b->icol = isicol; 3330 3331 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 3332 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3333 b->maxnz = nnz_max; 3334 3335 B->factortype = MAT_FACTOR_ILUDT; 3336 B->info.factor_mallocs = 0; 3337 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3338 /* ------- end of symbolic factorization ---------*/ 3339 3340 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3341 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3342 ics = ic; 3343 3344 /* linked list for storing column indices of the active row */ 3345 nlnk = n + 1; 3346 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3347 3348 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3349 ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr); 3350 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3351 ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr); 3352 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3353 3354 bi[0] = 0; 3355 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3356 bdiag_rev[n] = bdiag[0]; 3357 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3358 for (i=0; i<n; i++) { 3359 /* copy initial fill into linked list */ 3360 nzi = ai[r[i]+1] - ai[r[i]]; 3361 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3362 nzi_al = adiag[r[i]] - ai[r[i]]; 3363 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3364 ajtmp = aj + ai[r[i]]; 3365 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3366 3367 /* load in initial (unfactored row) */ 3368 aatmp = a->a + ai[r[i]]; 3369 for (j=0; j<nzi; j++) { 3370 rtmp[ics[*ajtmp++]] = *aatmp++; 3371 } 3372 3373 /* add pivot rows into linked list */ 3374 row = lnk[n]; 3375 while (row < i) { 3376 nzi_bl = bi[row+1] - bi[row] + 1; 3377 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3378 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3379 nzi += nlnk; 3380 row = lnk[row]; 3381 } 3382 3383 /* copy data from lnk into jtmp, then initialize lnk */ 3384 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3385 3386 /* numerical factorization */ 3387 bjtmp = jtmp; 3388 row = *bjtmp++; /* 1st pivot row */ 3389 while (row < i) { 3390 pc = rtmp + row; 3391 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3392 multiplier = (*pc) * (*pv); 3393 *pc = multiplier; 3394 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3395 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3396 pv = ba + bdiag[row+1] + 1; 3397 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3398 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3399 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3400 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3401 } 3402 row = *bjtmp++; 3403 } 3404 3405 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3406 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3407 nzi_bl = 0; j = 0; 3408 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3409 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3410 nzi_bl++; j++; 3411 } 3412 nzi_bu = nzi - nzi_bl -1; 3413 while (j < nzi) { 3414 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3415 j++; 3416 } 3417 3418 bjtmp = bj + bi[i]; 3419 batmp = ba + bi[i]; 3420 /* apply level dropping rule to L part */ 3421 ncut = nzi_al + dtcount; 3422 if (ncut < nzi_bl) { 3423 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3424 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3425 } else { 3426 ncut = nzi_bl; 3427 } 3428 for (j=0; j<ncut; j++) { 3429 bjtmp[j] = jtmp[j]; 3430 batmp[j] = vtmp[j]; 3431 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3432 } 3433 bi[i+1] = bi[i] + ncut; 3434 nzi = ncut + 1; 3435 3436 /* apply level dropping rule to U part */ 3437 ncut = nzi_au + dtcount; 3438 if (ncut < nzi_bu) { 3439 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3440 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3441 } else { 3442 ncut = nzi_bu; 3443 } 3444 nzi += ncut; 3445 3446 /* mark bdiagonal */ 3447 bdiag[i+1] = bdiag[i] - (ncut + 1); 3448 bdiag_rev[n-i-1] = bdiag[i+1]; 3449 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3450 bjtmp = bj + bdiag[i]; 3451 batmp = ba + bdiag[i]; 3452 *bjtmp = i; 3453 *batmp = diag_tmp; /* rtmp[i]; */ 3454 if (*batmp == 0.0) { 3455 *batmp = dt+shift; 3456 /* printf(" row %d add shift %g\n",i,shift); */ 3457 } 3458 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3459 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3460 3461 bjtmp = bj + bdiag[i+1]+1; 3462 batmp = ba + bdiag[i+1]+1; 3463 for (k=0; k<ncut; k++) { 3464 bjtmp[k] = jtmp[nzi_bl+1+k]; 3465 batmp[k] = vtmp[nzi_bl+1+k]; 3466 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3467 } 3468 /* printf("\n"); */ 3469 3470 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3471 /* 3472 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3473 printf(" ----------------------------\n"); 3474 */ 3475 } /* for (i=0; i<n; i++) */ 3476 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3477 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3478 3479 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3480 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3481 3482 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3483 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3484 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3485 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3486 3487 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3488 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3489 3490 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3491 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3492 if (row_identity && icol_identity) { 3493 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3494 } else { 3495 B->ops->solve = MatSolve_SeqAIJ; 3496 } 3497 3498 B->ops->solveadd = 0; 3499 B->ops->solvetranspose = 0; 3500 B->ops->solvetransposeadd = 0; 3501 B->ops->matsolve = 0; 3502 B->assembled = PETSC_TRUE; 3503 B->preallocated = PETSC_TRUE; 3504 PetscFunctionReturn(0); 3505 } 3506 3507 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3508 #undef __FUNCT__ 3509 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3510 /* 3511 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3512 */ 3513 3514 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3515 { 3516 PetscErrorCode ierr; 3517 3518 PetscFunctionBegin; 3519 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3520 PetscFunctionReturn(0); 3521 } 3522 3523 /* 3524 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3525 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3526 */ 3527 #undef __FUNCT__ 3528 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3529 /* 3530 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3531 */ 3532 3533 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3534 { 3535 Mat C =fact; 3536 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3537 IS isrow = b->row,isicol = b->icol; 3538 PetscErrorCode ierr; 3539 const PetscInt *r,*ic,*ics; 3540 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3541 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3542 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3543 PetscReal dt=info->dt,shift=info->shiftamount; 3544 PetscBool row_identity, col_identity; 3545 3546 PetscFunctionBegin; 3547 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3548 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3549 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 3550 ics = ic; 3551 3552 for (i=0; i<n; i++) { 3553 /* initialize rtmp array */ 3554 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3555 bjtmp = bj + bi[i]; 3556 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3557 rtmp[i] = 0.0; 3558 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3559 bjtmp = bj + bdiag[i+1] + 1; 3560 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3561 3562 /* load in initial unfactored row of A */ 3563 /* printf("row %d\n",i); */ 3564 nz = ai[r[i]+1] - ai[r[i]]; 3565 ajtmp = aj + ai[r[i]]; 3566 v = aa + ai[r[i]]; 3567 for (j=0; j<nz; j++) { 3568 rtmp[ics[*ajtmp++]] = v[j]; 3569 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3570 } 3571 /* printf("\n"); */ 3572 3573 /* numerical factorization */ 3574 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3575 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3576 k = 0; 3577 while (k < nzl) { 3578 row = *bjtmp++; 3579 /* printf(" prow %d\n",row); */ 3580 pc = rtmp + row; 3581 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3582 multiplier = (*pc) * (*pv); 3583 *pc = multiplier; 3584 if (PetscAbsScalar(multiplier) > dt) { 3585 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3586 pv = b->a + bdiag[row+1] + 1; 3587 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3588 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3589 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3590 } 3591 k++; 3592 } 3593 3594 /* finished row so stick it into b->a */ 3595 /* L-part */ 3596 pv = b->a + bi[i]; 3597 pj = bj + bi[i]; 3598 nzl = bi[i+1] - bi[i]; 3599 for (j=0; j<nzl; j++) { 3600 pv[j] = rtmp[pj[j]]; 3601 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3602 } 3603 3604 /* diagonal: invert diagonal entries for simplier triangular solves */ 3605 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3606 b->a[bdiag[i]] = 1.0/rtmp[i]; 3607 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3608 3609 /* U-part */ 3610 pv = b->a + bdiag[i+1] + 1; 3611 pj = bj + bdiag[i+1] + 1; 3612 nzu = bdiag[i] - bdiag[i+1] - 1; 3613 for (j=0; j<nzu; j++) { 3614 pv[j] = rtmp[pj[j]]; 3615 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3616 } 3617 /* printf("\n"); */ 3618 } 3619 3620 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3621 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3622 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3623 3624 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3625 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3626 if (row_identity && col_identity) { 3627 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3628 } else { 3629 C->ops->solve = MatSolve_SeqAIJ; 3630 } 3631 C->ops->solveadd = 0; 3632 C->ops->solvetranspose = 0; 3633 C->ops->solvetransposeadd = 0; 3634 C->ops->matsolve = 0; 3635 C->assembled = PETSC_TRUE; 3636 C->preallocated = PETSC_TRUE; 3637 3638 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3639 PetscFunctionReturn(0); 3640 } 3641