1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 9 /* 10 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 11 12 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 13 */ 14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 15 { 16 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 19 const PetscInt *ai = a->i, *aj = a->j; 20 const PetscScalar *aa = a->a; 21 PetscBool *done; 22 PetscReal best,past = 0,future; 23 24 PetscFunctionBegin; 25 /* pick initial row */ 26 best = -1; 27 for (i=0; i<n; i++) { 28 future = 0.0; 29 for (j=ai[i]; j<ai[i+1]; j++) { 30 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 31 else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc1(n,&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc1(n,&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 96 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 97 { 98 PetscFunctionBegin; 99 *flg = PETSC_TRUE; 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 105 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 106 { 107 PetscInt n = A->rmap->n; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 #if defined(PETSC_USE_COMPLEX) 112 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 113 #endif 114 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 115 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 116 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 117 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 118 119 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 120 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 121 122 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 123 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 124 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 125 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 126 127 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 128 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 129 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 130 (*B)->factortype = ftype; 131 PetscFunctionReturn(0); 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 137 { 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 139 IS isicol; 140 PetscErrorCode ierr; 141 const PetscInt *r,*ic; 142 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 143 PetscInt *bi,*bj,*ajtmp; 144 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 145 PetscReal f; 146 PetscInt nlnk,*lnk,k,**bi_ptr; 147 PetscFreeSpaceList free_space=NULL,current_space=NULL; 148 PetscBT lnkbt; 149 PetscBool missing; 150 151 PetscFunctionBegin; 152 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 153 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 154 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 155 156 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 157 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 158 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 159 160 /* get new row pointers */ 161 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 162 bi[0] = 0; 163 164 /* bdiag is location of diagonal in factor */ 165 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 166 bdiag[0] = 0; 167 168 /* linked list for storing column indices of the active row */ 169 nlnk = n + 1; 170 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 171 172 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 173 174 /* initial FreeSpace size is f*(ai[n]+1) */ 175 f = info->fill; 176 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 177 current_space = free_space; 178 179 for (i=0; i<n; i++) { 180 /* copy previous fill into linked list */ 181 nzi = 0; 182 nnz = ai[r[i]+1] - ai[r[i]]; 183 ajtmp = aj + ai[r[i]]; 184 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 185 nzi += nlnk; 186 187 /* add pivot rows into linked list */ 188 row = lnk[n]; 189 while (row < i) { 190 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 191 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 192 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 193 nzi += nlnk; 194 row = lnk[row]; 195 } 196 bi[i+1] = bi[i] + nzi; 197 im[i] = nzi; 198 199 /* mark bdiag */ 200 nzbd = 0; 201 nnz = nzi; 202 k = lnk[n]; 203 while (nnz-- && k < i) { 204 nzbd++; 205 k = lnk[k]; 206 } 207 bdiag[i] = bi[i] + nzbd; 208 209 /* if free space is not available, make more free space */ 210 if (current_space->local_remaining<nzi) { 211 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 212 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 213 reallocs++; 214 } 215 216 /* copy data into free space, then initialize lnk */ 217 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 218 219 bi_ptr[i] = current_space->array; 220 current_space->array += nzi; 221 current_space->local_used += nzi; 222 current_space->local_remaining -= nzi; 223 } 224 #if defined(PETSC_USE_INFO) 225 if (ai[n] != 0) { 226 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 227 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 228 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 229 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 230 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 231 } else { 232 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 233 } 234 #endif 235 236 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 238 239 /* destroy list of free space and other temporary array(s) */ 240 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 241 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 242 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 243 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 244 245 /* put together the new matrix */ 246 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 247 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 248 b = (Mat_SeqAIJ*)(B)->data; 249 250 b->free_a = PETSC_TRUE; 251 b->free_ij = PETSC_TRUE; 252 b->singlemalloc = PETSC_FALSE; 253 254 ierr = PetscMalloc1((bi[n]+1),&b->a);CHKERRQ(ierr); 255 b->j = bj; 256 b->i = bi; 257 b->diag = bdiag; 258 b->ilen = 0; 259 b->imax = 0; 260 b->row = isrow; 261 b->col = iscol; 262 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 263 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 264 b->icol = isicol; 265 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 266 267 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 268 ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 269 b->maxnz = b->nz = bi[n]; 270 271 (B)->factortype = MAT_FACTOR_LU; 272 (B)->info.factor_mallocs = reallocs; 273 (B)->info.fill_ratio_given = f; 274 275 if (ai[n]) { 276 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 277 } else { 278 (B)->info.fill_ratio_needed = 0.0; 279 } 280 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 281 if (a->inode.size) { 282 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 283 } 284 PetscFunctionReturn(0); 285 } 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 289 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 290 { 291 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 292 IS isicol; 293 PetscErrorCode ierr; 294 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 295 PetscInt i,n=A->rmap->n; 296 PetscInt *bi,*bj; 297 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 298 PetscReal f; 299 PetscInt nlnk,*lnk,k,**bi_ptr; 300 PetscFreeSpaceList free_space=NULL,current_space=NULL; 301 PetscBT lnkbt; 302 PetscBool missing; 303 304 PetscFunctionBegin; 305 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 306 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 307 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 308 309 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 310 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 311 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 312 313 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 314 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 315 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 316 bi[0] = bdiag[0] = 0; 317 318 /* linked list for storing column indices of the active row */ 319 nlnk = n + 1; 320 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 321 322 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 323 324 /* initial FreeSpace size is f*(ai[n]+1) */ 325 f = info->fill; 326 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 327 current_space = free_space; 328 329 for (i=0; i<n; i++) { 330 /* copy previous fill into linked list */ 331 nzi = 0; 332 nnz = ai[r[i]+1] - ai[r[i]]; 333 ajtmp = aj + ai[r[i]]; 334 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 335 nzi += nlnk; 336 337 /* add pivot rows into linked list */ 338 row = lnk[n]; 339 while (row < i) { 340 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 341 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 342 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 343 nzi += nlnk; 344 row = lnk[row]; 345 } 346 bi[i+1] = bi[i] + nzi; 347 im[i] = nzi; 348 349 /* mark bdiag */ 350 nzbd = 0; 351 nnz = nzi; 352 k = lnk[n]; 353 while (nnz-- && k < i) { 354 nzbd++; 355 k = lnk[k]; 356 } 357 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 358 359 /* if free space is not available, make more free space */ 360 if (current_space->local_remaining<nzi) { 361 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 362 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 363 reallocs++; 364 } 365 366 /* copy data into free space, then initialize lnk */ 367 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 368 369 bi_ptr[i] = current_space->array; 370 current_space->array += nzi; 371 current_space->local_used += nzi; 372 current_space->local_remaining -= nzi; 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 379 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 380 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 381 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 382 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 383 384 /* put together the new matrix */ 385 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 386 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 387 b = (Mat_SeqAIJ*)(B)->data; 388 389 b->free_a = PETSC_TRUE; 390 b->free_ij = PETSC_TRUE; 391 b->singlemalloc = PETSC_FALSE; 392 393 ierr = PetscMalloc1((bdiag[0]+1),&b->a);CHKERRQ(ierr); 394 395 b->j = bj; 396 b->i = bi; 397 b->diag = bdiag; 398 b->ilen = 0; 399 b->imax = 0; 400 b->row = isrow; 401 b->col = iscol; 402 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 403 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 404 b->icol = isicol; 405 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 406 407 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 408 ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 409 b->maxnz = b->nz = bdiag[0]+1; 410 411 B->factortype = MAT_FACTOR_LU; 412 B->info.factor_mallocs = reallocs; 413 B->info.fill_ratio_given = f; 414 415 if (ai[n]) { 416 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 417 } else { 418 B->info.fill_ratio_needed = 0.0; 419 } 420 #if defined(PETSC_USE_INFO) 421 if (ai[n] != 0) { 422 PetscReal af = B->info.fill_ratio_needed; 423 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 424 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 425 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 426 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 427 } else { 428 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 429 } 430 #endif 431 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 432 if (a->inode.size) { 433 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 434 } 435 ierr = Mat_CheckInode_FactorLU(B);CHKERRQ(ierr); 436 PetscFunctionReturn(0); 437 } 438 439 /* 440 Trouble in factorization, should we dump the original matrix? 441 */ 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatFactorDumpMatrix" 444 PetscErrorCode MatFactorDumpMatrix(Mat A) 445 { 446 PetscErrorCode ierr; 447 PetscBool flg = PETSC_FALSE; 448 449 PetscFunctionBegin; 450 ierr = PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 451 if (flg) { 452 PetscViewer viewer; 453 char filename[PETSC_MAX_PATH_LEN]; 454 455 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 456 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 457 ierr = MatView(A,viewer);CHKERRQ(ierr); 458 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 459 } 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 465 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 466 { 467 Mat C =B; 468 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 469 IS isrow = b->row,isicol = b->icol; 470 PetscErrorCode ierr; 471 const PetscInt *r,*ic,*ics; 472 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 473 PetscInt i,j,k,nz,nzL,row,*pj; 474 const PetscInt *ajtmp,*bjtmp; 475 MatScalar *rtmp,*pc,multiplier,*pv; 476 const MatScalar *aa=a->a,*v; 477 PetscBool row_identity,col_identity; 478 FactorShiftCtx sctx; 479 const PetscInt *ddiag; 480 PetscReal rs; 481 MatScalar d; 482 483 PetscFunctionBegin; 484 /* MatPivotSetUp(): initialize shift context sctx */ 485 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 486 487 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 488 ddiag = a->diag; 489 sctx.shift_top = info->zeropivot; 490 for (i=0; i<n; i++) { 491 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 492 d = (aa)[ddiag[i]]; 493 rs = -PetscAbsScalar(d) - PetscRealPart(d); 494 v = aa+ai[i]; 495 nz = ai[i+1] - ai[i]; 496 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 497 if (rs>sctx.shift_top) sctx.shift_top = rs; 498 } 499 sctx.shift_top *= 1.1; 500 sctx.nshift_max = 5; 501 sctx.shift_lo = 0.; 502 sctx.shift_hi = 1.; 503 } 504 505 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 506 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 507 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 508 ics = ic; 509 510 do { 511 sctx.newshift = PETSC_FALSE; 512 for (i=0; i<n; i++) { 513 /* zero rtmp */ 514 /* L part */ 515 nz = bi[i+1] - bi[i]; 516 bjtmp = bj + bi[i]; 517 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 518 519 /* U part */ 520 nz = bdiag[i]-bdiag[i+1]; 521 bjtmp = bj + bdiag[i+1]+1; 522 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 523 524 /* load in initial (unfactored row) */ 525 nz = ai[r[i]+1] - ai[r[i]]; 526 ajtmp = aj + ai[r[i]]; 527 v = aa + ai[r[i]]; 528 for (j=0; j<nz; j++) { 529 rtmp[ics[ajtmp[j]]] = v[j]; 530 } 531 /* ZeropivotApply() */ 532 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 533 534 /* elimination */ 535 bjtmp = bj + bi[i]; 536 row = *bjtmp++; 537 nzL = bi[i+1] - bi[i]; 538 for (k=0; k < nzL; k++) { 539 pc = rtmp + row; 540 if (*pc != 0.0) { 541 pv = b->a + bdiag[row]; 542 multiplier = *pc * (*pv); 543 *pc = multiplier; 544 545 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 546 pv = b->a + bdiag[row+1]+1; 547 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 548 549 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 550 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 551 } 552 row = *bjtmp++; 553 } 554 555 /* finished row so stick it into b->a */ 556 rs = 0.0; 557 /* L part */ 558 pv = b->a + bi[i]; 559 pj = b->j + bi[i]; 560 nz = bi[i+1] - bi[i]; 561 for (j=0; j<nz; j++) { 562 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 563 } 564 565 /* U part */ 566 pv = b->a + bdiag[i+1]+1; 567 pj = b->j + bdiag[i+1]+1; 568 nz = bdiag[i] - bdiag[i+1]-1; 569 for (j=0; j<nz; j++) { 570 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 571 } 572 573 sctx.rs = rs; 574 sctx.pv = rtmp[i]; 575 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 576 if (sctx.newshift) break; /* break for-loop */ 577 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 578 579 /* Mark diagonal and invert diagonal for simplier triangular solves */ 580 pv = b->a + bdiag[i]; 581 *pv = 1.0/rtmp[i]; 582 583 } /* endof for (i=0; i<n; i++) { */ 584 585 /* MatPivotRefine() */ 586 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 587 /* 588 * if no shift in this attempt & shifting & started shifting & can refine, 589 * then try lower shift 590 */ 591 sctx.shift_hi = sctx.shift_fraction; 592 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 593 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 594 sctx.newshift = PETSC_TRUE; 595 sctx.nshift++; 596 } 597 } while (sctx.newshift); 598 599 ierr = PetscFree(rtmp);CHKERRQ(ierr); 600 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 601 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 602 603 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 604 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 605 if (b->inode.size) { 606 C->ops->solve = MatSolve_SeqAIJ_Inode; 607 } else if (row_identity && col_identity) { 608 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 609 } else { 610 C->ops->solve = MatSolve_SeqAIJ; 611 } 612 C->ops->solveadd = MatSolveAdd_SeqAIJ; 613 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 614 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 615 C->ops->matsolve = MatMatSolve_SeqAIJ; 616 C->assembled = PETSC_TRUE; 617 C->preallocated = PETSC_TRUE; 618 619 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 620 621 /* MatShiftView(A,info,&sctx) */ 622 if (sctx.nshift) { 623 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 624 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 625 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 626 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 627 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 628 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 636 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 637 { 638 Mat C =B; 639 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 640 IS isrow = b->row,isicol = b->icol; 641 PetscErrorCode ierr; 642 const PetscInt *r,*ic,*ics; 643 PetscInt nz,row,i,j,n=A->rmap->n,diag; 644 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 645 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 646 MatScalar *pv,*rtmp,*pc,multiplier,d; 647 const MatScalar *v,*aa=a->a; 648 PetscReal rs=0.0; 649 FactorShiftCtx sctx; 650 const PetscInt *ddiag; 651 PetscBool row_identity, col_identity; 652 653 PetscFunctionBegin; 654 /* MatPivotSetUp(): initialize shift context sctx */ 655 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 656 657 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 658 ddiag = a->diag; 659 sctx.shift_top = info->zeropivot; 660 for (i=0; i<n; i++) { 661 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 662 d = (aa)[ddiag[i]]; 663 rs = -PetscAbsScalar(d) - PetscRealPart(d); 664 v = aa+ai[i]; 665 nz = ai[i+1] - ai[i]; 666 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 667 if (rs>sctx.shift_top) sctx.shift_top = rs; 668 } 669 sctx.shift_top *= 1.1; 670 sctx.nshift_max = 5; 671 sctx.shift_lo = 0.; 672 sctx.shift_hi = 1.; 673 } 674 675 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 676 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 677 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 678 ics = ic; 679 680 do { 681 sctx.newshift = PETSC_FALSE; 682 for (i=0; i<n; i++) { 683 nz = bi[i+1] - bi[i]; 684 bjtmp = bj + bi[i]; 685 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 686 687 /* load in initial (unfactored row) */ 688 nz = ai[r[i]+1] - ai[r[i]]; 689 ajtmp = aj + ai[r[i]]; 690 v = aa + ai[r[i]]; 691 for (j=0; j<nz; j++) { 692 rtmp[ics[ajtmp[j]]] = v[j]; 693 } 694 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 695 696 row = *bjtmp++; 697 while (row < i) { 698 pc = rtmp + row; 699 if (*pc != 0.0) { 700 pv = b->a + diag_offset[row]; 701 pj = b->j + diag_offset[row] + 1; 702 multiplier = *pc / *pv++; 703 *pc = multiplier; 704 nz = bi[row+1] - diag_offset[row] - 1; 705 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 706 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 707 } 708 row = *bjtmp++; 709 } 710 /* finished row so stick it into b->a */ 711 pv = b->a + bi[i]; 712 pj = b->j + bi[i]; 713 nz = bi[i+1] - bi[i]; 714 diag = diag_offset[i] - bi[i]; 715 rs = 0.0; 716 for (j=0; j<nz; j++) { 717 pv[j] = rtmp[pj[j]]; 718 rs += PetscAbsScalar(pv[j]); 719 } 720 rs -= PetscAbsScalar(pv[diag]); 721 722 sctx.rs = rs; 723 sctx.pv = pv[diag]; 724 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 725 if (sctx.newshift) break; 726 pv[diag] = sctx.pv; 727 } 728 729 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 730 /* 731 * if no shift in this attempt & shifting & started shifting & can refine, 732 * then try lower shift 733 */ 734 sctx.shift_hi = sctx.shift_fraction; 735 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 736 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 737 sctx.newshift = PETSC_TRUE; 738 sctx.nshift++; 739 } 740 } while (sctx.newshift); 741 742 /* invert diagonal entries for simplier triangular solves */ 743 for (i=0; i<n; i++) { 744 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 745 } 746 ierr = PetscFree(rtmp);CHKERRQ(ierr); 747 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 748 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 749 750 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 751 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 752 if (row_identity && col_identity) { 753 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 754 } else { 755 C->ops->solve = MatSolve_SeqAIJ_inplace; 756 } 757 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 758 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 759 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 760 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 761 762 C->assembled = PETSC_TRUE; 763 C->preallocated = PETSC_TRUE; 764 765 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 766 if (sctx.nshift) { 767 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 768 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 769 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 770 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 771 } 772 } 773 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 774 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 775 776 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 /* 781 This routine implements inplace ILU(0) with row or/and column permutations. 782 Input: 783 A - original matrix 784 Output; 785 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 786 a->j (col index) is permuted by the inverse of colperm, then sorted 787 a->a reordered accordingly with a->j 788 a->diag (ptr to diagonal elements) is updated. 789 */ 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 792 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 793 { 794 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 795 IS isrow = a->row,isicol = a->icol; 796 PetscErrorCode ierr; 797 const PetscInt *r,*ic,*ics; 798 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 799 PetscInt *ajtmp,nz,row; 800 PetscInt *diag = a->diag,nbdiag,*pj; 801 PetscScalar *rtmp,*pc,multiplier,d; 802 MatScalar *pv,*v; 803 PetscReal rs; 804 FactorShiftCtx sctx; 805 const MatScalar *aa=a->a,*vtmp; 806 807 PetscFunctionBegin; 808 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 809 810 /* MatPivotSetUp(): initialize shift context sctx */ 811 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 812 813 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 814 const PetscInt *ddiag = a->diag; 815 sctx.shift_top = info->zeropivot; 816 for (i=0; i<n; i++) { 817 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 818 d = (aa)[ddiag[i]]; 819 rs = -PetscAbsScalar(d) - PetscRealPart(d); 820 vtmp = aa+ai[i]; 821 nz = ai[i+1] - ai[i]; 822 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 823 if (rs>sctx.shift_top) sctx.shift_top = rs; 824 } 825 sctx.shift_top *= 1.1; 826 sctx.nshift_max = 5; 827 sctx.shift_lo = 0.; 828 sctx.shift_hi = 1.; 829 } 830 831 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 832 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 833 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 834 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 835 ics = ic; 836 837 #if defined(MV) 838 sctx.shift_top = 0.; 839 sctx.nshift_max = 0; 840 sctx.shift_lo = 0.; 841 sctx.shift_hi = 0.; 842 sctx.shift_fraction = 0.; 843 844 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 845 sctx.shift_top = 0.; 846 for (i=0; i<n; i++) { 847 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 848 d = (a->a)[diag[i]]; 849 rs = -PetscAbsScalar(d) - PetscRealPart(d); 850 v = a->a+ai[i]; 851 nz = ai[i+1] - ai[i]; 852 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 853 if (rs>sctx.shift_top) sctx.shift_top = rs; 854 } 855 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 856 sctx.shift_top *= 1.1; 857 sctx.nshift_max = 5; 858 sctx.shift_lo = 0.; 859 sctx.shift_hi = 1.; 860 } 861 862 sctx.shift_amount = 0.; 863 sctx.nshift = 0; 864 #endif 865 866 do { 867 sctx.newshift = PETSC_FALSE; 868 for (i=0; i<n; i++) { 869 /* load in initial unfactored row */ 870 nz = ai[r[i]+1] - ai[r[i]]; 871 ajtmp = aj + ai[r[i]]; 872 v = a->a + ai[r[i]]; 873 /* sort permuted ajtmp and values v accordingly */ 874 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 875 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 876 877 diag[r[i]] = ai[r[i]]; 878 for (j=0; j<nz; j++) { 879 rtmp[ajtmp[j]] = v[j]; 880 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 881 } 882 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 883 884 row = *ajtmp++; 885 while (row < i) { 886 pc = rtmp + row; 887 if (*pc != 0.0) { 888 pv = a->a + diag[r[row]]; 889 pj = aj + diag[r[row]] + 1; 890 891 multiplier = *pc / *pv++; 892 *pc = multiplier; 893 nz = ai[r[row]+1] - diag[r[row]] - 1; 894 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 895 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 896 } 897 row = *ajtmp++; 898 } 899 /* finished row so overwrite it onto a->a */ 900 pv = a->a + ai[r[i]]; 901 pj = aj + ai[r[i]]; 902 nz = ai[r[i]+1] - ai[r[i]]; 903 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 904 905 rs = 0.0; 906 for (j=0; j<nz; j++) { 907 pv[j] = rtmp[pj[j]]; 908 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 909 } 910 911 sctx.rs = rs; 912 sctx.pv = pv[nbdiag]; 913 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 914 if (sctx.newshift) break; 915 pv[nbdiag] = sctx.pv; 916 } 917 918 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 919 /* 920 * if no shift in this attempt & shifting & started shifting & can refine, 921 * then try lower shift 922 */ 923 sctx.shift_hi = sctx.shift_fraction; 924 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 925 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 926 sctx.newshift = PETSC_TRUE; 927 sctx.nshift++; 928 } 929 } while (sctx.newshift); 930 931 /* invert diagonal entries for simplier triangular solves */ 932 for (i=0; i<n; i++) { 933 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 934 } 935 936 ierr = PetscFree(rtmp);CHKERRQ(ierr); 937 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 938 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 939 940 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 941 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 942 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 943 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 944 945 A->assembled = PETSC_TRUE; 946 A->preallocated = PETSC_TRUE; 947 948 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 949 if (sctx.nshift) { 950 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 951 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 952 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 953 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 954 } 955 } 956 PetscFunctionReturn(0); 957 } 958 959 /* ----------------------------------------------------------- */ 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatLUFactor_SeqAIJ" 962 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 963 { 964 PetscErrorCode ierr; 965 Mat C; 966 967 PetscFunctionBegin; 968 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 969 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 970 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 971 972 A->ops->solve = C->ops->solve; 973 A->ops->solvetranspose = C->ops->solvetranspose; 974 975 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 976 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 /* ----------------------------------------------------------- */ 980 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 984 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 985 { 986 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 987 IS iscol = a->col,isrow = a->row; 988 PetscErrorCode ierr; 989 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 990 PetscInt nz; 991 const PetscInt *rout,*cout,*r,*c; 992 PetscScalar *x,*tmp,*tmps,sum; 993 const PetscScalar *b; 994 const MatScalar *aa = a->a,*v; 995 996 PetscFunctionBegin; 997 if (!n) PetscFunctionReturn(0); 998 999 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1000 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1001 tmp = a->solve_work; 1002 1003 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1004 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1005 1006 /* forward solve the lower triangular */ 1007 tmp[0] = b[*r++]; 1008 tmps = tmp; 1009 for (i=1; i<n; i++) { 1010 v = aa + ai[i]; 1011 vi = aj + ai[i]; 1012 nz = a->diag[i] - ai[i]; 1013 sum = b[*r++]; 1014 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1015 tmp[i] = sum; 1016 } 1017 1018 /* backward solve the upper triangular */ 1019 for (i=n-1; i>=0; i--) { 1020 v = aa + a->diag[i] + 1; 1021 vi = aj + a->diag[i] + 1; 1022 nz = ai[i+1] - a->diag[i] - 1; 1023 sum = tmp[i]; 1024 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1025 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1026 } 1027 1028 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1029 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1030 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1031 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1032 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1038 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1039 { 1040 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1041 IS iscol = a->col,isrow = a->row; 1042 PetscErrorCode ierr; 1043 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1044 PetscInt nz,neq; 1045 const PetscInt *rout,*cout,*r,*c; 1046 PetscScalar *x,*b,*tmp,*tmps,sum; 1047 const MatScalar *aa = a->a,*v; 1048 PetscBool bisdense,xisdense; 1049 1050 PetscFunctionBegin; 1051 if (!n) PetscFunctionReturn(0); 1052 1053 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1054 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1055 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1056 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1057 1058 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1059 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1060 1061 tmp = a->solve_work; 1062 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1063 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1064 1065 for (neq=0; neq<B->cmap->n; neq++) { 1066 /* forward solve the lower triangular */ 1067 tmp[0] = b[r[0]]; 1068 tmps = tmp; 1069 for (i=1; i<n; i++) { 1070 v = aa + ai[i]; 1071 vi = aj + ai[i]; 1072 nz = a->diag[i] - ai[i]; 1073 sum = b[r[i]]; 1074 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1075 tmp[i] = sum; 1076 } 1077 /* backward solve the upper triangular */ 1078 for (i=n-1; i>=0; i--) { 1079 v = aa + a->diag[i] + 1; 1080 vi = aj + a->diag[i] + 1; 1081 nz = ai[i+1] - a->diag[i] - 1; 1082 sum = tmp[i]; 1083 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1084 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1085 } 1086 1087 b += n; 1088 x += n; 1089 } 1090 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1091 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1092 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1093 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1094 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 #undef __FUNCT__ 1099 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1100 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1101 { 1102 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1103 IS iscol = a->col,isrow = a->row; 1104 PetscErrorCode ierr; 1105 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1106 PetscInt nz,neq; 1107 const PetscInt *rout,*cout,*r,*c; 1108 PetscScalar *x,*b,*tmp,sum; 1109 const MatScalar *aa = a->a,*v; 1110 PetscBool bisdense,xisdense; 1111 1112 PetscFunctionBegin; 1113 if (!n) PetscFunctionReturn(0); 1114 1115 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1116 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1117 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1118 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1119 1120 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1121 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1122 1123 tmp = a->solve_work; 1124 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1125 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1126 1127 for (neq=0; neq<B->cmap->n; neq++) { 1128 /* forward solve the lower triangular */ 1129 tmp[0] = b[r[0]]; 1130 v = aa; 1131 vi = aj; 1132 for (i=1; i<n; i++) { 1133 nz = ai[i+1] - ai[i]; 1134 sum = b[r[i]]; 1135 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1136 tmp[i] = sum; 1137 v += nz; vi += nz; 1138 } 1139 1140 /* backward solve the upper triangular */ 1141 for (i=n-1; i>=0; i--) { 1142 v = aa + adiag[i+1]+1; 1143 vi = aj + adiag[i+1]+1; 1144 nz = adiag[i]-adiag[i+1]-1; 1145 sum = tmp[i]; 1146 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1147 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1148 } 1149 1150 b += n; 1151 x += n; 1152 } 1153 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1154 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1155 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1156 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1157 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1163 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1164 { 1165 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1166 IS iscol = a->col,isrow = a->row; 1167 PetscErrorCode ierr; 1168 const PetscInt *r,*c,*rout,*cout; 1169 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1170 PetscInt nz,row; 1171 PetscScalar *x,*b,*tmp,*tmps,sum; 1172 const MatScalar *aa = a->a,*v; 1173 1174 PetscFunctionBegin; 1175 if (!n) PetscFunctionReturn(0); 1176 1177 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1178 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1179 tmp = a->solve_work; 1180 1181 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1182 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1183 1184 /* forward solve the lower triangular */ 1185 tmp[0] = b[*r++]; 1186 tmps = tmp; 1187 for (row=1; row<n; row++) { 1188 i = rout[row]; /* permuted row */ 1189 v = aa + ai[i]; 1190 vi = aj + ai[i]; 1191 nz = a->diag[i] - ai[i]; 1192 sum = b[*r++]; 1193 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1194 tmp[row] = sum; 1195 } 1196 1197 /* backward solve the upper triangular */ 1198 for (row=n-1; row>=0; row--) { 1199 i = rout[row]; /* permuted row */ 1200 v = aa + a->diag[i] + 1; 1201 vi = aj + a->diag[i] + 1; 1202 nz = ai[i+1] - a->diag[i] - 1; 1203 sum = tmp[row]; 1204 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1205 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1206 } 1207 1208 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1209 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1210 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1211 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1212 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* ----------------------------------------------------------- */ 1217 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1220 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1221 { 1222 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 PetscInt n = A->rmap->n; 1225 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1226 PetscScalar *x; 1227 const PetscScalar *b; 1228 const MatScalar *aa = a->a; 1229 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1230 PetscInt adiag_i,i,nz,ai_i; 1231 const PetscInt *vi; 1232 const MatScalar *v; 1233 PetscScalar sum; 1234 #endif 1235 1236 PetscFunctionBegin; 1237 if (!n) PetscFunctionReturn(0); 1238 1239 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1240 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1241 1242 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1243 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1244 #else 1245 /* forward solve the lower triangular */ 1246 x[0] = b[0]; 1247 for (i=1; i<n; i++) { 1248 ai_i = ai[i]; 1249 v = aa + ai_i; 1250 vi = aj + ai_i; 1251 nz = adiag[i] - ai_i; 1252 sum = b[i]; 1253 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1254 x[i] = sum; 1255 } 1256 1257 /* backward solve the upper triangular */ 1258 for (i=n-1; i>=0; i--) { 1259 adiag_i = adiag[i]; 1260 v = aa + adiag_i + 1; 1261 vi = aj + adiag_i + 1; 1262 nz = ai[i+1] - adiag_i - 1; 1263 sum = x[i]; 1264 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1265 x[i] = sum*aa[adiag_i]; 1266 } 1267 #endif 1268 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1269 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1270 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1276 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1277 { 1278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1279 IS iscol = a->col,isrow = a->row; 1280 PetscErrorCode ierr; 1281 PetscInt i, n = A->rmap->n,j; 1282 PetscInt nz; 1283 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1284 PetscScalar *x,*tmp,sum; 1285 const PetscScalar *b; 1286 const MatScalar *aa = a->a,*v; 1287 1288 PetscFunctionBegin; 1289 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1290 1291 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1292 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1293 tmp = a->solve_work; 1294 1295 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1296 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1297 1298 /* forward solve the lower triangular */ 1299 tmp[0] = b[*r++]; 1300 for (i=1; i<n; i++) { 1301 v = aa + ai[i]; 1302 vi = aj + ai[i]; 1303 nz = a->diag[i] - ai[i]; 1304 sum = b[*r++]; 1305 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1306 tmp[i] = sum; 1307 } 1308 1309 /* backward solve the upper triangular */ 1310 for (i=n-1; i>=0; i--) { 1311 v = aa + a->diag[i] + 1; 1312 vi = aj + a->diag[i] + 1; 1313 nz = ai[i+1] - a->diag[i] - 1; 1314 sum = tmp[i]; 1315 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1316 tmp[i] = sum*aa[a->diag[i]]; 1317 x[*c--] += tmp[i]; 1318 } 1319 1320 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1321 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1322 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1324 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1330 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1331 { 1332 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1333 IS iscol = a->col,isrow = a->row; 1334 PetscErrorCode ierr; 1335 PetscInt i, n = A->rmap->n,j; 1336 PetscInt nz; 1337 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1338 PetscScalar *x,*tmp,sum; 1339 const PetscScalar *b; 1340 const MatScalar *aa = a->a,*v; 1341 1342 PetscFunctionBegin; 1343 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1344 1345 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1346 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1347 tmp = a->solve_work; 1348 1349 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1350 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1351 1352 /* forward solve the lower triangular */ 1353 tmp[0] = b[r[0]]; 1354 v = aa; 1355 vi = aj; 1356 for (i=1; i<n; i++) { 1357 nz = ai[i+1] - ai[i]; 1358 sum = b[r[i]]; 1359 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1360 tmp[i] = sum; 1361 v += nz; 1362 vi += nz; 1363 } 1364 1365 /* backward solve the upper triangular */ 1366 v = aa + adiag[n-1]; 1367 vi = aj + adiag[n-1]; 1368 for (i=n-1; i>=0; i--) { 1369 nz = adiag[i] - adiag[i+1] - 1; 1370 sum = tmp[i]; 1371 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1372 tmp[i] = sum*v[nz]; 1373 x[c[i]] += tmp[i]; 1374 v += nz+1; vi += nz+1; 1375 } 1376 1377 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1378 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1379 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1380 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1381 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1387 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1388 { 1389 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1390 IS iscol = a->col,isrow = a->row; 1391 PetscErrorCode ierr; 1392 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1393 PetscInt i,n = A->rmap->n,j; 1394 PetscInt nz; 1395 PetscScalar *x,*tmp,s1; 1396 const MatScalar *aa = a->a,*v; 1397 const PetscScalar *b; 1398 1399 PetscFunctionBegin; 1400 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1401 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1402 tmp = a->solve_work; 1403 1404 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1405 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1406 1407 /* copy the b into temp work space according to permutation */ 1408 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1409 1410 /* forward solve the U^T */ 1411 for (i=0; i<n; i++) { 1412 v = aa + diag[i]; 1413 vi = aj + diag[i] + 1; 1414 nz = ai[i+1] - diag[i] - 1; 1415 s1 = tmp[i]; 1416 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1417 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1418 tmp[i] = s1; 1419 } 1420 1421 /* backward solve the L^T */ 1422 for (i=n-1; i>=0; i--) { 1423 v = aa + diag[i] - 1; 1424 vi = aj + diag[i] - 1; 1425 nz = diag[i] - ai[i]; 1426 s1 = tmp[i]; 1427 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1428 } 1429 1430 /* copy tmp into x according to permutation */ 1431 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1432 1433 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1434 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1435 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1436 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1437 1438 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1444 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1445 { 1446 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1447 IS iscol = a->col,isrow = a->row; 1448 PetscErrorCode ierr; 1449 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1450 PetscInt i,n = A->rmap->n,j; 1451 PetscInt nz; 1452 PetscScalar *x,*tmp,s1; 1453 const MatScalar *aa = a->a,*v; 1454 const PetscScalar *b; 1455 1456 PetscFunctionBegin; 1457 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1459 tmp = a->solve_work; 1460 1461 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1462 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1463 1464 /* copy the b into temp work space according to permutation */ 1465 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1466 1467 /* forward solve the U^T */ 1468 for (i=0; i<n; i++) { 1469 v = aa + adiag[i+1] + 1; 1470 vi = aj + adiag[i+1] + 1; 1471 nz = adiag[i] - adiag[i+1] - 1; 1472 s1 = tmp[i]; 1473 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1474 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1475 tmp[i] = s1; 1476 } 1477 1478 /* backward solve the L^T */ 1479 for (i=n-1; i>=0; i--) { 1480 v = aa + ai[i]; 1481 vi = aj + ai[i]; 1482 nz = ai[i+1] - ai[i]; 1483 s1 = tmp[i]; 1484 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1485 } 1486 1487 /* copy tmp into x according to permutation */ 1488 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1489 1490 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1491 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1492 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1493 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1494 1495 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1501 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1502 { 1503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1504 IS iscol = a->col,isrow = a->row; 1505 PetscErrorCode ierr; 1506 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1507 PetscInt i,n = A->rmap->n,j; 1508 PetscInt nz; 1509 PetscScalar *x,*tmp,s1; 1510 const MatScalar *aa = a->a,*v; 1511 const PetscScalar *b; 1512 1513 PetscFunctionBegin; 1514 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1515 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1516 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1517 tmp = a->solve_work; 1518 1519 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1520 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1521 1522 /* copy the b into temp work space according to permutation */ 1523 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1524 1525 /* forward solve the U^T */ 1526 for (i=0; i<n; i++) { 1527 v = aa + diag[i]; 1528 vi = aj + diag[i] + 1; 1529 nz = ai[i+1] - diag[i] - 1; 1530 s1 = tmp[i]; 1531 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1532 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1533 tmp[i] = s1; 1534 } 1535 1536 /* backward solve the L^T */ 1537 for (i=n-1; i>=0; i--) { 1538 v = aa + diag[i] - 1; 1539 vi = aj + diag[i] - 1; 1540 nz = diag[i] - ai[i]; 1541 s1 = tmp[i]; 1542 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1543 } 1544 1545 /* copy tmp into x according to permutation */ 1546 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1547 1548 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1549 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1550 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1551 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1552 1553 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1559 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1560 { 1561 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1562 IS iscol = a->col,isrow = a->row; 1563 PetscErrorCode ierr; 1564 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1565 PetscInt i,n = A->rmap->n,j; 1566 PetscInt nz; 1567 PetscScalar *x,*tmp,s1; 1568 const MatScalar *aa = a->a,*v; 1569 const PetscScalar *b; 1570 1571 PetscFunctionBegin; 1572 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1573 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1574 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1575 tmp = a->solve_work; 1576 1577 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1578 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1579 1580 /* copy the b into temp work space according to permutation */ 1581 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1582 1583 /* forward solve the U^T */ 1584 for (i=0; i<n; i++) { 1585 v = aa + adiag[i+1] + 1; 1586 vi = aj + adiag[i+1] + 1; 1587 nz = adiag[i] - adiag[i+1] - 1; 1588 s1 = tmp[i]; 1589 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1590 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1591 tmp[i] = s1; 1592 } 1593 1594 1595 /* backward solve the L^T */ 1596 for (i=n-1; i>=0; i--) { 1597 v = aa + ai[i]; 1598 vi = aj + ai[i]; 1599 nz = ai[i+1] - ai[i]; 1600 s1 = tmp[i]; 1601 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1602 } 1603 1604 /* copy tmp into x according to permutation */ 1605 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1606 1607 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1608 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1609 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1610 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1611 1612 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 /* ----------------------------------------------------------------*/ 1617 1618 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 1619 1620 /* 1621 ilu() under revised new data structure. 1622 Factored arrays bj and ba are stored as 1623 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1624 1625 bi=fact->i is an array of size n+1, in which 1626 bi+ 1627 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1628 bi[n]: points to L(n-1,n-1)+1 1629 1630 bdiag=fact->diag is an array of size n+1,in which 1631 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1632 bdiag[n]: points to entry of U(n-1,0)-1 1633 1634 U(i,:) contains bdiag[i] as its last entry, i.e., 1635 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1636 */ 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1639 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1640 { 1641 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1642 PetscErrorCode ierr; 1643 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1644 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1645 IS isicol; 1646 1647 PetscFunctionBegin; 1648 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1649 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1650 b = (Mat_SeqAIJ*)(fact)->data; 1651 1652 /* allocate matrix arrays for new data structure */ 1653 ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 1654 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1655 1656 b->singlemalloc = PETSC_TRUE; 1657 if (!b->diag) { 1658 ierr = PetscMalloc1((n+1),&b->diag);CHKERRQ(ierr); 1659 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1660 } 1661 bdiag = b->diag; 1662 1663 if (n > 0) { 1664 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1665 } 1666 1667 /* set bi and bj with new data structure */ 1668 bi = b->i; 1669 bj = b->j; 1670 1671 /* L part */ 1672 bi[0] = 0; 1673 for (i=0; i<n; i++) { 1674 nz = adiag[i] - ai[i]; 1675 bi[i+1] = bi[i] + nz; 1676 aj = a->j + ai[i]; 1677 for (j=0; j<nz; j++) { 1678 /* *bj = aj[j]; bj++; */ 1679 bj[k++] = aj[j]; 1680 } 1681 } 1682 1683 /* U part */ 1684 bdiag[n] = bi[n]-1; 1685 for (i=n-1; i>=0; i--) { 1686 nz = ai[i+1] - adiag[i] - 1; 1687 aj = a->j + adiag[i] + 1; 1688 for (j=0; j<nz; j++) { 1689 /* *bj = aj[j]; bj++; */ 1690 bj[k++] = aj[j]; 1691 } 1692 /* diag[i] */ 1693 /* *bj = i; bj++; */ 1694 bj[k++] = i; 1695 bdiag[i] = bdiag[i+1] + nz + 1; 1696 } 1697 1698 fact->factortype = MAT_FACTOR_ILU; 1699 fact->info.factor_mallocs = 0; 1700 fact->info.fill_ratio_given = info->fill; 1701 fact->info.fill_ratio_needed = 1.0; 1702 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1703 ierr = Mat_CheckInode_FactorLU(fact);CHKERRQ(ierr); 1704 1705 b = (Mat_SeqAIJ*)(fact)->data; 1706 b->row = isrow; 1707 b->col = iscol; 1708 b->icol = isicol; 1709 ierr = PetscMalloc1((fact->rmap->n+1),&b->solve_work);CHKERRQ(ierr); 1710 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1711 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1717 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1718 { 1719 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1720 IS isicol; 1721 PetscErrorCode ierr; 1722 const PetscInt *r,*ic; 1723 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1724 PetscInt *bi,*cols,nnz,*cols_lvl; 1725 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1726 PetscInt i,levels,diagonal_fill; 1727 PetscBool col_identity,row_identity,missing; 1728 PetscReal f; 1729 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1730 PetscBT lnkbt; 1731 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1732 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1733 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1734 1735 PetscFunctionBegin; 1736 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1737 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1738 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1739 1740 levels = (PetscInt)info->levels; 1741 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1742 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1743 if (!levels && row_identity && col_identity) { 1744 /* special case: ilu(0) with natural ordering */ 1745 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1746 if (a->inode.size) { 1747 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1748 } 1749 PetscFunctionReturn(0); 1750 } 1751 1752 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1753 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1754 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1755 1756 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1757 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 1758 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 1759 bi[0] = bdiag[0] = 0; 1760 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1761 1762 /* create a linked list for storing column indices of the active row */ 1763 nlnk = n + 1; 1764 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1765 1766 /* initial FreeSpace size is f*(ai[n]+1) */ 1767 f = info->fill; 1768 diagonal_fill = (PetscInt)info->diagonal_fill; 1769 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1770 current_space = free_space; 1771 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1772 current_space_lvl = free_space_lvl; 1773 for (i=0; i<n; i++) { 1774 nzi = 0; 1775 /* copy current row into linked list */ 1776 nnz = ai[r[i]+1] - ai[r[i]]; 1777 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1778 cols = aj + ai[r[i]]; 1779 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1780 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1781 nzi += nlnk; 1782 1783 /* make sure diagonal entry is included */ 1784 if (diagonal_fill && lnk[i] == -1) { 1785 fm = n; 1786 while (lnk[fm] < i) fm = lnk[fm]; 1787 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1788 lnk[fm] = i; 1789 lnk_lvl[i] = 0; 1790 nzi++; dcount++; 1791 } 1792 1793 /* add pivot rows into the active row */ 1794 nzbd = 0; 1795 prow = lnk[n]; 1796 while (prow < i) { 1797 nnz = bdiag[prow]; 1798 cols = bj_ptr[prow] + nnz + 1; 1799 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1800 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1801 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1802 nzi += nlnk; 1803 prow = lnk[prow]; 1804 nzbd++; 1805 } 1806 bdiag[i] = nzbd; 1807 bi[i+1] = bi[i] + nzi; 1808 /* if free space is not available, make more free space */ 1809 if (current_space->local_remaining<nzi) { 1810 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1811 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1812 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1813 reallocs++; 1814 } 1815 1816 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1817 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1818 bj_ptr[i] = current_space->array; 1819 bjlvl_ptr[i] = current_space_lvl->array; 1820 1821 /* make sure the active row i has diagonal entry */ 1822 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1823 1824 current_space->array += nzi; 1825 current_space->local_used += nzi; 1826 current_space->local_remaining -= nzi; 1827 current_space_lvl->array += nzi; 1828 current_space_lvl->local_used += nzi; 1829 current_space_lvl->local_remaining -= nzi; 1830 } 1831 1832 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1833 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1834 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1835 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 1836 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1837 1838 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1839 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1840 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1841 1842 #if defined(PETSC_USE_INFO) 1843 { 1844 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1845 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1846 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1847 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1848 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1849 if (diagonal_fill) { 1850 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1851 } 1852 } 1853 #endif 1854 /* put together the new matrix */ 1855 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1856 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 1857 b = (Mat_SeqAIJ*)(fact)->data; 1858 1859 b->free_a = PETSC_TRUE; 1860 b->free_ij = PETSC_TRUE; 1861 b->singlemalloc = PETSC_FALSE; 1862 1863 ierr = PetscMalloc1((bdiag[0]+1),&b->a);CHKERRQ(ierr); 1864 1865 b->j = bj; 1866 b->i = bi; 1867 b->diag = bdiag; 1868 b->ilen = 0; 1869 b->imax = 0; 1870 b->row = isrow; 1871 b->col = iscol; 1872 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1873 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1874 b->icol = isicol; 1875 1876 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 1877 /* In b structure: Free imax, ilen, old a, old j. 1878 Allocate bdiag, solve_work, new a, new j */ 1879 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1880 b->maxnz = b->nz = bdiag[0]+1; 1881 1882 (fact)->info.factor_mallocs = reallocs; 1883 (fact)->info.fill_ratio_given = f; 1884 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1885 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1886 if (a->inode.size) { 1887 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1888 } 1889 ierr = Mat_CheckInode_FactorLU(fact);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1895 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1896 { 1897 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1898 IS isicol; 1899 PetscErrorCode ierr; 1900 const PetscInt *r,*ic; 1901 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1902 PetscInt *bi,*cols,nnz,*cols_lvl; 1903 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1904 PetscInt i,levels,diagonal_fill; 1905 PetscBool col_identity,row_identity; 1906 PetscReal f; 1907 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1908 PetscBT lnkbt; 1909 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1910 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1911 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1912 PetscBool missing; 1913 1914 PetscFunctionBegin; 1915 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1916 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1917 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1918 1919 f = info->fill; 1920 levels = (PetscInt)info->levels; 1921 diagonal_fill = (PetscInt)info->diagonal_fill; 1922 1923 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1924 1925 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1926 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1927 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1928 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1929 1930 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1931 if (a->inode.size) { 1932 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1933 } 1934 fact->factortype = MAT_FACTOR_ILU; 1935 (fact)->info.factor_mallocs = 0; 1936 (fact)->info.fill_ratio_given = info->fill; 1937 (fact)->info.fill_ratio_needed = 1.0; 1938 1939 b = (Mat_SeqAIJ*)(fact)->data; 1940 b->row = isrow; 1941 b->col = iscol; 1942 b->icol = isicol; 1943 ierr = PetscMalloc1(((fact)->rmap->n+1),&b->solve_work);CHKERRQ(ierr); 1944 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1945 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1946 PetscFunctionReturn(0); 1947 } 1948 1949 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1950 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1951 1952 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1953 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 1954 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 1955 bi[0] = bdiag[0] = 0; 1956 1957 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1958 1959 /* create a linked list for storing column indices of the active row */ 1960 nlnk = n + 1; 1961 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1962 1963 /* initial FreeSpace size is f*(ai[n]+1) */ 1964 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1965 current_space = free_space; 1966 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1967 current_space_lvl = free_space_lvl; 1968 1969 for (i=0; i<n; i++) { 1970 nzi = 0; 1971 /* copy current row into linked list */ 1972 nnz = ai[r[i]+1] - ai[r[i]]; 1973 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1974 cols = aj + ai[r[i]]; 1975 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1976 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1977 nzi += nlnk; 1978 1979 /* make sure diagonal entry is included */ 1980 if (diagonal_fill && lnk[i] == -1) { 1981 fm = n; 1982 while (lnk[fm] < i) fm = lnk[fm]; 1983 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1984 lnk[fm] = i; 1985 lnk_lvl[i] = 0; 1986 nzi++; dcount++; 1987 } 1988 1989 /* add pivot rows into the active row */ 1990 nzbd = 0; 1991 prow = lnk[n]; 1992 while (prow < i) { 1993 nnz = bdiag[prow]; 1994 cols = bj_ptr[prow] + nnz + 1; 1995 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1996 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1997 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1998 nzi += nlnk; 1999 prow = lnk[prow]; 2000 nzbd++; 2001 } 2002 bdiag[i] = nzbd; 2003 bi[i+1] = bi[i] + nzi; 2004 2005 /* if free space is not available, make more free space */ 2006 if (current_space->local_remaining<nzi) { 2007 nnz = nzi*(n - i); /* estimated and max additional space needed */ 2008 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2009 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2010 reallocs++; 2011 } 2012 2013 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2014 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2015 bj_ptr[i] = current_space->array; 2016 bjlvl_ptr[i] = current_space_lvl->array; 2017 2018 /* make sure the active row i has diagonal entry */ 2019 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2020 2021 current_space->array += nzi; 2022 current_space->local_used += nzi; 2023 current_space->local_remaining -= nzi; 2024 current_space_lvl->array += nzi; 2025 current_space_lvl->local_used += nzi; 2026 current_space_lvl->local_remaining -= nzi; 2027 } 2028 2029 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2030 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2031 2032 /* destroy list of free space and other temporary arrays */ 2033 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 2034 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2035 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2036 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2037 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2038 2039 #if defined(PETSC_USE_INFO) 2040 { 2041 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2042 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 2043 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2044 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 2045 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2046 if (diagonal_fill) { 2047 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2048 } 2049 } 2050 #endif 2051 2052 /* put together the new matrix */ 2053 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2054 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 2055 b = (Mat_SeqAIJ*)(fact)->data; 2056 2057 b->free_a = PETSC_TRUE; 2058 b->free_ij = PETSC_TRUE; 2059 b->singlemalloc = PETSC_FALSE; 2060 2061 ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr); 2062 b->j = bj; 2063 b->i = bi; 2064 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2065 b->diag = bdiag; 2066 b->ilen = 0; 2067 b->imax = 0; 2068 b->row = isrow; 2069 b->col = iscol; 2070 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2071 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2072 b->icol = isicol; 2073 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 2074 /* In b structure: Free imax, ilen, old a, old j. 2075 Allocate bdiag, solve_work, new a, new j */ 2076 ierr = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2077 b->maxnz = b->nz = bi[n]; 2078 2079 (fact)->info.factor_mallocs = reallocs; 2080 (fact)->info.fill_ratio_given = f; 2081 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2082 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2083 if (a->inode.size) { 2084 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2091 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2092 { 2093 Mat C = B; 2094 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2095 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2096 IS ip=b->row,iip = b->icol; 2097 PetscErrorCode ierr; 2098 const PetscInt *rip,*riip; 2099 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2100 PetscInt *ai=a->i,*aj=a->j; 2101 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2102 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2103 PetscBool perm_identity; 2104 FactorShiftCtx sctx; 2105 PetscReal rs; 2106 MatScalar d,*v; 2107 2108 PetscFunctionBegin; 2109 /* MatPivotSetUp(): initialize shift context sctx */ 2110 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2111 2112 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2113 sctx.shift_top = info->zeropivot; 2114 for (i=0; i<mbs; i++) { 2115 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2116 d = (aa)[a->diag[i]]; 2117 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2118 v = aa+ai[i]; 2119 nz = ai[i+1] - ai[i]; 2120 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2121 if (rs>sctx.shift_top) sctx.shift_top = rs; 2122 } 2123 sctx.shift_top *= 1.1; 2124 sctx.nshift_max = 5; 2125 sctx.shift_lo = 0.; 2126 sctx.shift_hi = 1.; 2127 } 2128 2129 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2130 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2131 2132 /* allocate working arrays 2133 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2134 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2135 */ 2136 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr); 2137 2138 do { 2139 sctx.newshift = PETSC_FALSE; 2140 2141 for (i=0; i<mbs; i++) c2r[i] = mbs; 2142 if (mbs) il[0] = 0; 2143 2144 for (k = 0; k<mbs; k++) { 2145 /* zero rtmp */ 2146 nz = bi[k+1] - bi[k]; 2147 bjtmp = bj + bi[k]; 2148 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2149 2150 /* load in initial unfactored row */ 2151 bval = ba + bi[k]; 2152 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2153 for (j = jmin; j < jmax; j++) { 2154 col = riip[aj[j]]; 2155 if (col >= k) { /* only take upper triangular entry */ 2156 rtmp[col] = aa[j]; 2157 *bval++ = 0.0; /* for in-place factorization */ 2158 } 2159 } 2160 /* shift the diagonal of the matrix: ZeropivotApply() */ 2161 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2162 2163 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2164 dk = rtmp[k]; 2165 i = c2r[k]; /* first row to be added to k_th row */ 2166 2167 while (i < k) { 2168 nexti = c2r[i]; /* next row to be added to k_th row */ 2169 2170 /* compute multiplier, update diag(k) and U(i,k) */ 2171 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2172 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2173 dk += uikdi*ba[ili]; /* update diag[k] */ 2174 ba[ili] = uikdi; /* -U(i,k) */ 2175 2176 /* add multiple of row i to k-th row */ 2177 jmin = ili + 1; jmax = bi[i+1]; 2178 if (jmin < jmax) { 2179 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2180 /* update il and c2r for row i */ 2181 il[i] = jmin; 2182 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2183 } 2184 i = nexti; 2185 } 2186 2187 /* copy data into U(k,:) */ 2188 rs = 0.0; 2189 jmin = bi[k]; jmax = bi[k+1]-1; 2190 if (jmin < jmax) { 2191 for (j=jmin; j<jmax; j++) { 2192 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2193 } 2194 /* add the k-th row into il and c2r */ 2195 il[k] = jmin; 2196 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2197 } 2198 2199 /* MatPivotCheck() */ 2200 sctx.rs = rs; 2201 sctx.pv = dk; 2202 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2203 if (sctx.newshift) break; 2204 dk = sctx.pv; 2205 2206 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2207 } 2208 } while (sctx.newshift); 2209 2210 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2211 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2212 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2213 2214 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2215 if (perm_identity) { 2216 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2217 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2218 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2219 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2220 } else { 2221 B->ops->solve = MatSolve_SeqSBAIJ_1; 2222 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2223 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2224 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2225 } 2226 2227 C->assembled = PETSC_TRUE; 2228 C->preallocated = PETSC_TRUE; 2229 2230 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2231 2232 /* MatPivotView() */ 2233 if (sctx.nshift) { 2234 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2235 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 2236 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2237 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2238 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2239 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 2240 } 2241 } 2242 PetscFunctionReturn(0); 2243 } 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2247 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2248 { 2249 Mat C = B; 2250 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2251 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2252 IS ip=b->row,iip = b->icol; 2253 PetscErrorCode ierr; 2254 const PetscInt *rip,*riip; 2255 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2256 PetscInt *ai=a->i,*aj=a->j; 2257 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2258 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2259 PetscBool perm_identity; 2260 FactorShiftCtx sctx; 2261 PetscReal rs; 2262 MatScalar d,*v; 2263 2264 PetscFunctionBegin; 2265 /* MatPivotSetUp(): initialize shift context sctx */ 2266 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2267 2268 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2269 sctx.shift_top = info->zeropivot; 2270 for (i=0; i<mbs; i++) { 2271 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2272 d = (aa)[a->diag[i]]; 2273 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2274 v = aa+ai[i]; 2275 nz = ai[i+1] - ai[i]; 2276 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2277 if (rs>sctx.shift_top) sctx.shift_top = rs; 2278 } 2279 sctx.shift_top *= 1.1; 2280 sctx.nshift_max = 5; 2281 sctx.shift_lo = 0.; 2282 sctx.shift_hi = 1.; 2283 } 2284 2285 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2286 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2287 2288 /* initialization */ 2289 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 2290 2291 do { 2292 sctx.newshift = PETSC_FALSE; 2293 2294 for (i=0; i<mbs; i++) jl[i] = mbs; 2295 il[0] = 0; 2296 2297 for (k = 0; k<mbs; k++) { 2298 /* zero rtmp */ 2299 nz = bi[k+1] - bi[k]; 2300 bjtmp = bj + bi[k]; 2301 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2302 2303 bval = ba + bi[k]; 2304 /* initialize k-th row by the perm[k]-th row of A */ 2305 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2306 for (j = jmin; j < jmax; j++) { 2307 col = riip[aj[j]]; 2308 if (col >= k) { /* only take upper triangular entry */ 2309 rtmp[col] = aa[j]; 2310 *bval++ = 0.0; /* for in-place factorization */ 2311 } 2312 } 2313 /* shift the diagonal of the matrix */ 2314 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2315 2316 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2317 dk = rtmp[k]; 2318 i = jl[k]; /* first row to be added to k_th row */ 2319 2320 while (i < k) { 2321 nexti = jl[i]; /* next row to be added to k_th row */ 2322 2323 /* compute multiplier, update diag(k) and U(i,k) */ 2324 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2325 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2326 dk += uikdi*ba[ili]; 2327 ba[ili] = uikdi; /* -U(i,k) */ 2328 2329 /* add multiple of row i to k-th row */ 2330 jmin = ili + 1; jmax = bi[i+1]; 2331 if (jmin < jmax) { 2332 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2333 /* update il and jl for row i */ 2334 il[i] = jmin; 2335 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2336 } 2337 i = nexti; 2338 } 2339 2340 /* shift the diagonals when zero pivot is detected */ 2341 /* compute rs=sum of abs(off-diagonal) */ 2342 rs = 0.0; 2343 jmin = bi[k]+1; 2344 nz = bi[k+1] - jmin; 2345 bcol = bj + jmin; 2346 for (j=0; j<nz; j++) { 2347 rs += PetscAbsScalar(rtmp[bcol[j]]); 2348 } 2349 2350 sctx.rs = rs; 2351 sctx.pv = dk; 2352 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2353 if (sctx.newshift) break; 2354 dk = sctx.pv; 2355 2356 /* copy data into U(k,:) */ 2357 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2358 jmin = bi[k]+1; jmax = bi[k+1]; 2359 if (jmin < jmax) { 2360 for (j=jmin; j<jmax; j++) { 2361 col = bj[j]; ba[j] = rtmp[col]; 2362 } 2363 /* add the k-th row into il and jl */ 2364 il[k] = jmin; 2365 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2366 } 2367 } 2368 } while (sctx.newshift); 2369 2370 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2371 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2372 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2373 2374 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2375 if (perm_identity) { 2376 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2377 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2378 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2379 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2380 } else { 2381 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2382 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2383 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2384 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2385 } 2386 2387 C->assembled = PETSC_TRUE; 2388 C->preallocated = PETSC_TRUE; 2389 2390 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2391 if (sctx.nshift) { 2392 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2393 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2394 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2395 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2396 } 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 /* 2402 icc() under revised new data structure. 2403 Factored arrays bj and ba are stored as 2404 U(0,:),...,U(i,:),U(n-1,:) 2405 2406 ui=fact->i is an array of size n+1, in which 2407 ui+ 2408 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2409 ui[n]: points to U(n-1,n-1)+1 2410 2411 udiag=fact->diag is an array of size n,in which 2412 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2413 2414 U(i,:) contains udiag[i] as its last entry, i.e., 2415 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2416 */ 2417 2418 #undef __FUNCT__ 2419 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2420 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2421 { 2422 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2423 Mat_SeqSBAIJ *b; 2424 PetscErrorCode ierr; 2425 PetscBool perm_identity,missing; 2426 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2427 const PetscInt *rip,*riip; 2428 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2429 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2430 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2431 PetscReal fill =info->fill,levels=info->levels; 2432 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2433 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2434 PetscBT lnkbt; 2435 IS iperm; 2436 2437 PetscFunctionBegin; 2438 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2439 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2440 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2441 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2442 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2443 2444 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 2445 ierr = PetscMalloc1((am+1),&udiag);CHKERRQ(ierr); 2446 ui[0] = 0; 2447 2448 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2449 if (!levels && perm_identity) { 2450 for (i=0; i<am; i++) { 2451 ncols = ai[i+1] - a->diag[i]; 2452 ui[i+1] = ui[i] + ncols; 2453 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2454 } 2455 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2456 cols = uj; 2457 for (i=0; i<am; i++) { 2458 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2459 ncols = ai[i+1] - a->diag[i] -1; 2460 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2461 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2462 } 2463 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2464 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2465 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2466 2467 /* initialization */ 2468 ierr = PetscMalloc1((am+1),&ajtmp);CHKERRQ(ierr); 2469 2470 /* jl: linked list for storing indices of the pivot rows 2471 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2472 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2473 for (i=0; i<am; i++) { 2474 jl[i] = am; il[i] = 0; 2475 } 2476 2477 /* create and initialize a linked list for storing column indices of the active row k */ 2478 nlnk = am + 1; 2479 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2480 2481 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2482 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2483 current_space = free_space; 2484 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2485 current_space_lvl = free_space_lvl; 2486 2487 for (k=0; k<am; k++) { /* for each active row k */ 2488 /* initialize lnk by the column indices of row rip[k] of A */ 2489 nzk = 0; 2490 ncols = ai[rip[k]+1] - ai[rip[k]]; 2491 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2492 ncols_upper = 0; 2493 for (j=0; j<ncols; j++) { 2494 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2495 if (riip[i] >= k) { /* only take upper triangular entry */ 2496 ajtmp[ncols_upper] = i; 2497 ncols_upper++; 2498 } 2499 } 2500 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2501 nzk += nlnk; 2502 2503 /* update lnk by computing fill-in for each pivot row to be merged in */ 2504 prow = jl[k]; /* 1st pivot row */ 2505 2506 while (prow < k) { 2507 nextprow = jl[prow]; 2508 2509 /* merge prow into k-th row */ 2510 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2511 jmax = ui[prow+1]; 2512 ncols = jmax-jmin; 2513 i = jmin - ui[prow]; 2514 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2515 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2516 j = *(uj - 1); 2517 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2518 nzk += nlnk; 2519 2520 /* update il and jl for prow */ 2521 if (jmin < jmax) { 2522 il[prow] = jmin; 2523 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2524 } 2525 prow = nextprow; 2526 } 2527 2528 /* if free space is not available, make more free space */ 2529 if (current_space->local_remaining<nzk) { 2530 i = am - k + 1; /* num of unfactored rows */ 2531 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2532 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2533 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2534 reallocs++; 2535 } 2536 2537 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2538 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2539 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2540 2541 /* add the k-th row into il and jl */ 2542 if (nzk > 1) { 2543 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2544 jl[k] = jl[i]; jl[i] = k; 2545 il[k] = ui[k] + 1; 2546 } 2547 uj_ptr[k] = current_space->array; 2548 uj_lvl_ptr[k] = current_space_lvl->array; 2549 2550 current_space->array += nzk; 2551 current_space->local_used += nzk; 2552 current_space->local_remaining -= nzk; 2553 2554 current_space_lvl->array += nzk; 2555 current_space_lvl->local_used += nzk; 2556 current_space_lvl->local_remaining -= nzk; 2557 2558 ui[k+1] = ui[k] + nzk; 2559 } 2560 2561 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2562 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2563 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2564 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2565 2566 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2567 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2568 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2569 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2570 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2571 2572 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2573 2574 /* put together the new matrix in MATSEQSBAIJ format */ 2575 b = (Mat_SeqSBAIJ*)(fact)->data; 2576 b->singlemalloc = PETSC_FALSE; 2577 2578 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 2579 2580 b->j = uj; 2581 b->i = ui; 2582 b->diag = udiag; 2583 b->free_diag = PETSC_TRUE; 2584 b->ilen = 0; 2585 b->imax = 0; 2586 b->row = perm; 2587 b->col = perm; 2588 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2589 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2590 b->icol = iperm; 2591 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2592 2593 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 2594 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2595 2596 b->maxnz = b->nz = ui[am]; 2597 b->free_a = PETSC_TRUE; 2598 b->free_ij = PETSC_TRUE; 2599 2600 fact->info.factor_mallocs = reallocs; 2601 fact->info.fill_ratio_given = fill; 2602 if (ai[am] != 0) { 2603 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2604 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2605 } else { 2606 fact->info.fill_ratio_needed = 0.0; 2607 } 2608 #if defined(PETSC_USE_INFO) 2609 if (ai[am] != 0) { 2610 PetscReal af = fact->info.fill_ratio_needed; 2611 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2612 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2613 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2614 } else { 2615 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2616 } 2617 #endif 2618 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2619 PetscFunctionReturn(0); 2620 } 2621 2622 #undef __FUNCT__ 2623 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2624 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2625 { 2626 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2627 Mat_SeqSBAIJ *b; 2628 PetscErrorCode ierr; 2629 PetscBool perm_identity,missing; 2630 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2631 const PetscInt *rip,*riip; 2632 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2633 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2634 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2635 PetscReal fill =info->fill,levels=info->levels; 2636 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2637 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2638 PetscBT lnkbt; 2639 IS iperm; 2640 2641 PetscFunctionBegin; 2642 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2643 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2644 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2645 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2646 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2647 2648 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 2649 ierr = PetscMalloc1((am+1),&udiag);CHKERRQ(ierr); 2650 ui[0] = 0; 2651 2652 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2653 if (!levels && perm_identity) { 2654 2655 for (i=0; i<am; i++) { 2656 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2657 udiag[i] = ui[i]; 2658 } 2659 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2660 cols = uj; 2661 for (i=0; i<am; i++) { 2662 aj = a->j + a->diag[i]; 2663 ncols = ui[i+1] - ui[i]; 2664 for (j=0; j<ncols; j++) *cols++ = *aj++; 2665 } 2666 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2667 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2668 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2669 2670 /* initialization */ 2671 ierr = PetscMalloc1((am+1),&ajtmp);CHKERRQ(ierr); 2672 2673 /* jl: linked list for storing indices of the pivot rows 2674 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2675 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2676 for (i=0; i<am; i++) { 2677 jl[i] = am; il[i] = 0; 2678 } 2679 2680 /* create and initialize a linked list for storing column indices of the active row k */ 2681 nlnk = am + 1; 2682 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2683 2684 /* initial FreeSpace size is fill*(ai[am]+1) */ 2685 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2686 current_space = free_space; 2687 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2688 current_space_lvl = free_space_lvl; 2689 2690 for (k=0; k<am; k++) { /* for each active row k */ 2691 /* initialize lnk by the column indices of row rip[k] of A */ 2692 nzk = 0; 2693 ncols = ai[rip[k]+1] - ai[rip[k]]; 2694 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2695 ncols_upper = 0; 2696 for (j=0; j<ncols; j++) { 2697 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2698 if (riip[i] >= k) { /* only take upper triangular entry */ 2699 ajtmp[ncols_upper] = i; 2700 ncols_upper++; 2701 } 2702 } 2703 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2704 nzk += nlnk; 2705 2706 /* update lnk by computing fill-in for each pivot row to be merged in */ 2707 prow = jl[k]; /* 1st pivot row */ 2708 2709 while (prow < k) { 2710 nextprow = jl[prow]; 2711 2712 /* merge prow into k-th row */ 2713 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2714 jmax = ui[prow+1]; 2715 ncols = jmax-jmin; 2716 i = jmin - ui[prow]; 2717 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2718 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2719 j = *(uj - 1); 2720 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2721 nzk += nlnk; 2722 2723 /* update il and jl for prow */ 2724 if (jmin < jmax) { 2725 il[prow] = jmin; 2726 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2727 } 2728 prow = nextprow; 2729 } 2730 2731 /* if free space is not available, make more free space */ 2732 if (current_space->local_remaining<nzk) { 2733 i = am - k + 1; /* num of unfactored rows */ 2734 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2735 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2736 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2737 reallocs++; 2738 } 2739 2740 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2741 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2742 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2743 2744 /* add the k-th row into il and jl */ 2745 if (nzk > 1) { 2746 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2747 jl[k] = jl[i]; jl[i] = k; 2748 il[k] = ui[k] + 1; 2749 } 2750 uj_ptr[k] = current_space->array; 2751 uj_lvl_ptr[k] = current_space_lvl->array; 2752 2753 current_space->array += nzk; 2754 current_space->local_used += nzk; 2755 current_space->local_remaining -= nzk; 2756 2757 current_space_lvl->array += nzk; 2758 current_space_lvl->local_used += nzk; 2759 current_space_lvl->local_remaining -= nzk; 2760 2761 ui[k+1] = ui[k] + nzk; 2762 } 2763 2764 #if defined(PETSC_USE_INFO) 2765 if (ai[am] != 0) { 2766 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2767 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2768 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2769 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2770 } else { 2771 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2772 } 2773 #endif 2774 2775 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2776 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2777 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2778 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2779 2780 /* destroy list of free space and other temporary array(s) */ 2781 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2782 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2783 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2784 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2785 2786 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2787 2788 /* put together the new matrix in MATSEQSBAIJ format */ 2789 2790 b = (Mat_SeqSBAIJ*)fact->data; 2791 b->singlemalloc = PETSC_FALSE; 2792 2793 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 2794 2795 b->j = uj; 2796 b->i = ui; 2797 b->diag = udiag; 2798 b->free_diag = PETSC_TRUE; 2799 b->ilen = 0; 2800 b->imax = 0; 2801 b->row = perm; 2802 b->col = perm; 2803 2804 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2805 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2806 2807 b->icol = iperm; 2808 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2809 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 2810 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2811 b->maxnz = b->nz = ui[am]; 2812 b->free_a = PETSC_TRUE; 2813 b->free_ij = PETSC_TRUE; 2814 2815 fact->info.factor_mallocs = reallocs; 2816 fact->info.fill_ratio_given = fill; 2817 if (ai[am] != 0) { 2818 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2819 } else { 2820 fact->info.fill_ratio_needed = 0.0; 2821 } 2822 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2823 PetscFunctionReturn(0); 2824 } 2825 2826 #undef __FUNCT__ 2827 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2828 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2829 { 2830 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2831 Mat_SeqSBAIJ *b; 2832 PetscErrorCode ierr; 2833 PetscBool perm_identity,missing; 2834 PetscReal fill = info->fill; 2835 const PetscInt *rip,*riip; 2836 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2837 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2838 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2839 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2840 PetscBT lnkbt; 2841 IS iperm; 2842 2843 PetscFunctionBegin; 2844 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2845 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2846 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2847 2848 /* check whether perm is the identity mapping */ 2849 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2850 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2851 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2852 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2853 2854 /* initialization */ 2855 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 2856 ierr = PetscMalloc1((am+1),&udiag);CHKERRQ(ierr); 2857 ui[0] = 0; 2858 2859 /* jl: linked list for storing indices of the pivot rows 2860 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2861 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2862 for (i=0; i<am; i++) { 2863 jl[i] = am; il[i] = 0; 2864 } 2865 2866 /* create and initialize a linked list for storing column indices of the active row k */ 2867 nlnk = am + 1; 2868 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2869 2870 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2871 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2872 current_space = free_space; 2873 2874 for (k=0; k<am; k++) { /* for each active row k */ 2875 /* initialize lnk by the column indices of row rip[k] of A */ 2876 nzk = 0; 2877 ncols = ai[rip[k]+1] - ai[rip[k]]; 2878 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2879 ncols_upper = 0; 2880 for (j=0; j<ncols; j++) { 2881 i = riip[*(aj + ai[rip[k]] + j)]; 2882 if (i >= k) { /* only take upper triangular entry */ 2883 cols[ncols_upper] = i; 2884 ncols_upper++; 2885 } 2886 } 2887 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2888 nzk += nlnk; 2889 2890 /* update lnk by computing fill-in for each pivot row to be merged in */ 2891 prow = jl[k]; /* 1st pivot row */ 2892 2893 while (prow < k) { 2894 nextprow = jl[prow]; 2895 /* merge prow into k-th row */ 2896 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2897 jmax = ui[prow+1]; 2898 ncols = jmax-jmin; 2899 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2900 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2901 nzk += nlnk; 2902 2903 /* update il and jl for prow */ 2904 if (jmin < jmax) { 2905 il[prow] = jmin; 2906 j = *uj_ptr; 2907 jl[prow] = jl[j]; 2908 jl[j] = prow; 2909 } 2910 prow = nextprow; 2911 } 2912 2913 /* if free space is not available, make more free space */ 2914 if (current_space->local_remaining<nzk) { 2915 i = am - k + 1; /* num of unfactored rows */ 2916 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2917 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2918 reallocs++; 2919 } 2920 2921 /* copy data into free space, then initialize lnk */ 2922 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2923 2924 /* add the k-th row into il and jl */ 2925 if (nzk > 1) { 2926 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2927 jl[k] = jl[i]; jl[i] = k; 2928 il[k] = ui[k] + 1; 2929 } 2930 ui_ptr[k] = current_space->array; 2931 2932 current_space->array += nzk; 2933 current_space->local_used += nzk; 2934 current_space->local_remaining -= nzk; 2935 2936 ui[k+1] = ui[k] + nzk; 2937 } 2938 2939 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2940 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2941 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2942 2943 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2944 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2945 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2946 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2947 2948 /* put together the new matrix in MATSEQSBAIJ format */ 2949 2950 b = (Mat_SeqSBAIJ*)fact->data; 2951 b->singlemalloc = PETSC_FALSE; 2952 b->free_a = PETSC_TRUE; 2953 b->free_ij = PETSC_TRUE; 2954 2955 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 2956 2957 b->j = uj; 2958 b->i = ui; 2959 b->diag = udiag; 2960 b->free_diag = PETSC_TRUE; 2961 b->ilen = 0; 2962 b->imax = 0; 2963 b->row = perm; 2964 b->col = perm; 2965 2966 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2967 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2968 2969 b->icol = iperm; 2970 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2971 2972 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 2973 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2974 2975 b->maxnz = b->nz = ui[am]; 2976 2977 fact->info.factor_mallocs = reallocs; 2978 fact->info.fill_ratio_given = fill; 2979 if (ai[am] != 0) { 2980 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2981 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2982 } else { 2983 fact->info.fill_ratio_needed = 0.0; 2984 } 2985 #if defined(PETSC_USE_INFO) 2986 if (ai[am] != 0) { 2987 PetscReal af = fact->info.fill_ratio_needed; 2988 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2989 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2990 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2991 } else { 2992 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2993 } 2994 #endif 2995 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2996 PetscFunctionReturn(0); 2997 } 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 3001 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 3002 { 3003 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3004 Mat_SeqSBAIJ *b; 3005 PetscErrorCode ierr; 3006 PetscBool perm_identity,missing; 3007 PetscReal fill = info->fill; 3008 const PetscInt *rip,*riip; 3009 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 3010 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 3011 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 3012 PetscFreeSpaceList free_space=NULL,current_space=NULL; 3013 PetscBT lnkbt; 3014 IS iperm; 3015 3016 PetscFunctionBegin; 3017 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 3018 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3019 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3020 3021 /* check whether perm is the identity mapping */ 3022 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 3023 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 3024 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 3025 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 3026 3027 /* initialization */ 3028 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 3029 ui[0] = 0; 3030 3031 /* jl: linked list for storing indices of the pivot rows 3032 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3033 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 3034 for (i=0; i<am; i++) { 3035 jl[i] = am; il[i] = 0; 3036 } 3037 3038 /* create and initialize a linked list for storing column indices of the active row k */ 3039 nlnk = am + 1; 3040 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3041 3042 /* initial FreeSpace size is fill*(ai[am]+1) */ 3043 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3044 current_space = free_space; 3045 3046 for (k=0; k<am; k++) { /* for each active row k */ 3047 /* initialize lnk by the column indices of row rip[k] of A */ 3048 nzk = 0; 3049 ncols = ai[rip[k]+1] - ai[rip[k]]; 3050 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3051 ncols_upper = 0; 3052 for (j=0; j<ncols; j++) { 3053 i = riip[*(aj + ai[rip[k]] + j)]; 3054 if (i >= k) { /* only take upper triangular entry */ 3055 cols[ncols_upper] = i; 3056 ncols_upper++; 3057 } 3058 } 3059 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3060 nzk += nlnk; 3061 3062 /* update lnk by computing fill-in for each pivot row to be merged in */ 3063 prow = jl[k]; /* 1st pivot row */ 3064 3065 while (prow < k) { 3066 nextprow = jl[prow]; 3067 /* merge prow into k-th row */ 3068 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3069 jmax = ui[prow+1]; 3070 ncols = jmax-jmin; 3071 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3072 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3073 nzk += nlnk; 3074 3075 /* update il and jl for prow */ 3076 if (jmin < jmax) { 3077 il[prow] = jmin; 3078 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3079 } 3080 prow = nextprow; 3081 } 3082 3083 /* if free space is not available, make more free space */ 3084 if (current_space->local_remaining<nzk) { 3085 i = am - k + 1; /* num of unfactored rows */ 3086 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3087 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3088 reallocs++; 3089 } 3090 3091 /* copy data into free space, then initialize lnk */ 3092 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3093 3094 /* add the k-th row into il and jl */ 3095 if (nzk-1 > 0) { 3096 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3097 jl[k] = jl[i]; jl[i] = k; 3098 il[k] = ui[k] + 1; 3099 } 3100 ui_ptr[k] = current_space->array; 3101 3102 current_space->array += nzk; 3103 current_space->local_used += nzk; 3104 current_space->local_remaining -= nzk; 3105 3106 ui[k+1] = ui[k] + nzk; 3107 } 3108 3109 #if defined(PETSC_USE_INFO) 3110 if (ai[am] != 0) { 3111 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3112 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3113 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3114 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3115 } else { 3116 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3117 } 3118 #endif 3119 3120 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3121 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3122 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3123 3124 /* destroy list of free space and other temporary array(s) */ 3125 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 3126 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3127 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3128 3129 /* put together the new matrix in MATSEQSBAIJ format */ 3130 3131 b = (Mat_SeqSBAIJ*)fact->data; 3132 b->singlemalloc = PETSC_FALSE; 3133 b->free_a = PETSC_TRUE; 3134 b->free_ij = PETSC_TRUE; 3135 3136 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 3137 3138 b->j = uj; 3139 b->i = ui; 3140 b->diag = 0; 3141 b->ilen = 0; 3142 b->imax = 0; 3143 b->row = perm; 3144 b->col = perm; 3145 3146 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3147 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3148 3149 b->icol = iperm; 3150 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3151 3152 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 3153 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3154 b->maxnz = b->nz = ui[am]; 3155 3156 fact->info.factor_mallocs = reallocs; 3157 fact->info.fill_ratio_given = fill; 3158 if (ai[am] != 0) { 3159 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3160 } else { 3161 fact->info.fill_ratio_needed = 0.0; 3162 } 3163 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3164 PetscFunctionReturn(0); 3165 } 3166 3167 #undef __FUNCT__ 3168 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3169 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3170 { 3171 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3172 PetscErrorCode ierr; 3173 PetscInt n = A->rmap->n; 3174 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3175 PetscScalar *x,sum; 3176 const PetscScalar *b; 3177 const MatScalar *aa = a->a,*v; 3178 PetscInt i,nz; 3179 3180 PetscFunctionBegin; 3181 if (!n) PetscFunctionReturn(0); 3182 3183 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3184 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3185 3186 /* forward solve the lower triangular */ 3187 x[0] = b[0]; 3188 v = aa; 3189 vi = aj; 3190 for (i=1; i<n; i++) { 3191 nz = ai[i+1] - ai[i]; 3192 sum = b[i]; 3193 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3194 v += nz; 3195 vi += nz; 3196 x[i] = sum; 3197 } 3198 3199 /* backward solve the upper triangular */ 3200 for (i=n-1; i>=0; i--) { 3201 v = aa + adiag[i+1] + 1; 3202 vi = aj + adiag[i+1] + 1; 3203 nz = adiag[i] - adiag[i+1]-1; 3204 sum = x[i]; 3205 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3206 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3207 } 3208 3209 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3210 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3211 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3212 PetscFunctionReturn(0); 3213 } 3214 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "MatSolve_SeqAIJ" 3217 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3218 { 3219 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3220 IS iscol = a->col,isrow = a->row; 3221 PetscErrorCode ierr; 3222 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3223 const PetscInt *rout,*cout,*r,*c; 3224 PetscScalar *x,*tmp,sum; 3225 const PetscScalar *b; 3226 const MatScalar *aa = a->a,*v; 3227 3228 PetscFunctionBegin; 3229 if (!n) PetscFunctionReturn(0); 3230 3231 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3232 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3233 tmp = a->solve_work; 3234 3235 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3236 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3237 3238 /* forward solve the lower triangular */ 3239 tmp[0] = b[r[0]]; 3240 v = aa; 3241 vi = aj; 3242 for (i=1; i<n; i++) { 3243 nz = ai[i+1] - ai[i]; 3244 sum = b[r[i]]; 3245 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3246 tmp[i] = sum; 3247 v += nz; vi += nz; 3248 } 3249 3250 /* backward solve the upper triangular */ 3251 for (i=n-1; i>=0; i--) { 3252 v = aa + adiag[i+1]+1; 3253 vi = aj + adiag[i+1]+1; 3254 nz = adiag[i]-adiag[i+1]-1; 3255 sum = tmp[i]; 3256 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3257 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3258 } 3259 3260 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3261 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3262 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3263 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3264 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3270 /* 3271 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3272 */ 3273 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3274 { 3275 Mat B = *fact; 3276 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3277 IS isicol; 3278 PetscErrorCode ierr; 3279 const PetscInt *r,*ic; 3280 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3281 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3282 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3283 PetscInt nlnk,*lnk; 3284 PetscBT lnkbt; 3285 PetscBool row_identity,icol_identity; 3286 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3287 const PetscInt *ics; 3288 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3289 PetscReal dt =info->dt,shift=info->shiftamount; 3290 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3291 PetscBool missing; 3292 3293 PetscFunctionBegin; 3294 if (dt == PETSC_DEFAULT) dt = 0.005; 3295 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3296 3297 /* ------- symbolic factorization, can be reused ---------*/ 3298 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3299 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3300 adiag=a->diag; 3301 3302 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3303 3304 /* bdiag is location of diagonal in factor */ 3305 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3306 ierr = PetscMalloc1((n+1),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3307 3308 /* allocate row pointers bi */ 3309 ierr = PetscMalloc1((2*n+2),&bi);CHKERRQ(ierr); 3310 3311 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3312 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3313 nnz_max = ai[n]+2*n*dtcount+2; 3314 3315 ierr = PetscMalloc1((nnz_max+1),&bj);CHKERRQ(ierr); 3316 ierr = PetscMalloc1((nnz_max+1),&ba);CHKERRQ(ierr); 3317 3318 /* put together the new matrix */ 3319 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3320 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 3321 b = (Mat_SeqAIJ*)B->data; 3322 3323 b->free_a = PETSC_TRUE; 3324 b->free_ij = PETSC_TRUE; 3325 b->singlemalloc = PETSC_FALSE; 3326 3327 b->a = ba; 3328 b->j = bj; 3329 b->i = bi; 3330 b->diag = bdiag; 3331 b->ilen = 0; 3332 b->imax = 0; 3333 b->row = isrow; 3334 b->col = iscol; 3335 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3336 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3337 b->icol = isicol; 3338 3339 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 3340 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3341 b->maxnz = nnz_max; 3342 3343 B->factortype = MAT_FACTOR_ILUDT; 3344 B->info.factor_mallocs = 0; 3345 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3346 /* ------- end of symbolic factorization ---------*/ 3347 3348 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3349 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3350 ics = ic; 3351 3352 /* linked list for storing column indices of the active row */ 3353 nlnk = n + 1; 3354 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3355 3356 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3357 ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr); 3358 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3359 ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr); 3360 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3361 3362 bi[0] = 0; 3363 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3364 bdiag_rev[n] = bdiag[0]; 3365 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3366 for (i=0; i<n; i++) { 3367 /* copy initial fill into linked list */ 3368 nzi = ai[r[i]+1] - ai[r[i]]; 3369 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3370 nzi_al = adiag[r[i]] - ai[r[i]]; 3371 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3372 ajtmp = aj + ai[r[i]]; 3373 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3374 3375 /* load in initial (unfactored row) */ 3376 aatmp = a->a + ai[r[i]]; 3377 for (j=0; j<nzi; j++) { 3378 rtmp[ics[*ajtmp++]] = *aatmp++; 3379 } 3380 3381 /* add pivot rows into linked list */ 3382 row = lnk[n]; 3383 while (row < i) { 3384 nzi_bl = bi[row+1] - bi[row] + 1; 3385 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3386 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3387 nzi += nlnk; 3388 row = lnk[row]; 3389 } 3390 3391 /* copy data from lnk into jtmp, then initialize lnk */ 3392 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3393 3394 /* numerical factorization */ 3395 bjtmp = jtmp; 3396 row = *bjtmp++; /* 1st pivot row */ 3397 while (row < i) { 3398 pc = rtmp + row; 3399 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3400 multiplier = (*pc) * (*pv); 3401 *pc = multiplier; 3402 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3403 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3404 pv = ba + bdiag[row+1] + 1; 3405 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3406 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3407 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3408 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3409 } 3410 row = *bjtmp++; 3411 } 3412 3413 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3414 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3415 nzi_bl = 0; j = 0; 3416 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3417 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3418 nzi_bl++; j++; 3419 } 3420 nzi_bu = nzi - nzi_bl -1; 3421 while (j < nzi) { 3422 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3423 j++; 3424 } 3425 3426 bjtmp = bj + bi[i]; 3427 batmp = ba + bi[i]; 3428 /* apply level dropping rule to L part */ 3429 ncut = nzi_al + dtcount; 3430 if (ncut < nzi_bl) { 3431 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3432 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3433 } else { 3434 ncut = nzi_bl; 3435 } 3436 for (j=0; j<ncut; j++) { 3437 bjtmp[j] = jtmp[j]; 3438 batmp[j] = vtmp[j]; 3439 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3440 } 3441 bi[i+1] = bi[i] + ncut; 3442 nzi = ncut + 1; 3443 3444 /* apply level dropping rule to U part */ 3445 ncut = nzi_au + dtcount; 3446 if (ncut < nzi_bu) { 3447 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3448 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3449 } else { 3450 ncut = nzi_bu; 3451 } 3452 nzi += ncut; 3453 3454 /* mark bdiagonal */ 3455 bdiag[i+1] = bdiag[i] - (ncut + 1); 3456 bdiag_rev[n-i-1] = bdiag[i+1]; 3457 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3458 bjtmp = bj + bdiag[i]; 3459 batmp = ba + bdiag[i]; 3460 *bjtmp = i; 3461 *batmp = diag_tmp; /* rtmp[i]; */ 3462 if (*batmp == 0.0) { 3463 *batmp = dt+shift; 3464 /* printf(" row %d add shift %g\n",i,shift); */ 3465 } 3466 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3467 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3468 3469 bjtmp = bj + bdiag[i+1]+1; 3470 batmp = ba + bdiag[i+1]+1; 3471 for (k=0; k<ncut; k++) { 3472 bjtmp[k] = jtmp[nzi_bl+1+k]; 3473 batmp[k] = vtmp[nzi_bl+1+k]; 3474 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3475 } 3476 /* printf("\n"); */ 3477 3478 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3479 /* 3480 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3481 printf(" ----------------------------\n"); 3482 */ 3483 } /* for (i=0; i<n; i++) */ 3484 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3485 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3486 3487 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3488 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3489 3490 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3491 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3492 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3493 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3494 3495 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3496 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3497 3498 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3499 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3500 if (row_identity && icol_identity) { 3501 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3502 } else { 3503 B->ops->solve = MatSolve_SeqAIJ; 3504 } 3505 3506 B->ops->solveadd = 0; 3507 B->ops->solvetranspose = 0; 3508 B->ops->solvetransposeadd = 0; 3509 B->ops->matsolve = 0; 3510 B->assembled = PETSC_TRUE; 3511 B->preallocated = PETSC_TRUE; 3512 PetscFunctionReturn(0); 3513 } 3514 3515 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3516 #undef __FUNCT__ 3517 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3518 /* 3519 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3520 */ 3521 3522 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3523 { 3524 PetscErrorCode ierr; 3525 3526 PetscFunctionBegin; 3527 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3528 PetscFunctionReturn(0); 3529 } 3530 3531 /* 3532 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3533 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3534 */ 3535 #undef __FUNCT__ 3536 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3537 /* 3538 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3539 */ 3540 3541 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3542 { 3543 Mat C =fact; 3544 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3545 IS isrow = b->row,isicol = b->icol; 3546 PetscErrorCode ierr; 3547 const PetscInt *r,*ic,*ics; 3548 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3549 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3550 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3551 PetscReal dt=info->dt,shift=info->shiftamount; 3552 PetscBool row_identity, col_identity; 3553 3554 PetscFunctionBegin; 3555 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3556 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3557 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 3558 ics = ic; 3559 3560 for (i=0; i<n; i++) { 3561 /* initialize rtmp array */ 3562 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3563 bjtmp = bj + bi[i]; 3564 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3565 rtmp[i] = 0.0; 3566 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3567 bjtmp = bj + bdiag[i+1] + 1; 3568 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3569 3570 /* load in initial unfactored row of A */ 3571 /* printf("row %d\n",i); */ 3572 nz = ai[r[i]+1] - ai[r[i]]; 3573 ajtmp = aj + ai[r[i]]; 3574 v = aa + ai[r[i]]; 3575 for (j=0; j<nz; j++) { 3576 rtmp[ics[*ajtmp++]] = v[j]; 3577 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3578 } 3579 /* printf("\n"); */ 3580 3581 /* numerical factorization */ 3582 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3583 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3584 k = 0; 3585 while (k < nzl) { 3586 row = *bjtmp++; 3587 /* printf(" prow %d\n",row); */ 3588 pc = rtmp + row; 3589 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3590 multiplier = (*pc) * (*pv); 3591 *pc = multiplier; 3592 if (PetscAbsScalar(multiplier) > dt) { 3593 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3594 pv = b->a + bdiag[row+1] + 1; 3595 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3596 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3597 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3598 } 3599 k++; 3600 } 3601 3602 /* finished row so stick it into b->a */ 3603 /* L-part */ 3604 pv = b->a + bi[i]; 3605 pj = bj + bi[i]; 3606 nzl = bi[i+1] - bi[i]; 3607 for (j=0; j<nzl; j++) { 3608 pv[j] = rtmp[pj[j]]; 3609 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3610 } 3611 3612 /* diagonal: invert diagonal entries for simplier triangular solves */ 3613 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3614 b->a[bdiag[i]] = 1.0/rtmp[i]; 3615 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3616 3617 /* U-part */ 3618 pv = b->a + bdiag[i+1] + 1; 3619 pj = bj + bdiag[i+1] + 1; 3620 nzu = bdiag[i] - bdiag[i+1] - 1; 3621 for (j=0; j<nzu; j++) { 3622 pv[j] = rtmp[pj[j]]; 3623 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3624 } 3625 /* printf("\n"); */ 3626 } 3627 3628 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3629 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3630 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3631 3632 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3633 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3634 if (row_identity && col_identity) { 3635 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3636 } else { 3637 C->ops->solve = MatSolve_SeqAIJ; 3638 } 3639 C->ops->solveadd = 0; 3640 C->ops->solvetranspose = 0; 3641 C->ops->solvetransposeadd = 0; 3642 C->ops->matsolve = 0; 3643 C->assembled = PETSC_TRUE; 3644 C->preallocated = PETSC_TRUE; 3645 3646 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3647 PetscFunctionReturn(0); 3648 } 3649