1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 9 /* 10 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 11 12 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 13 */ 14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 15 { 16 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 19 const PetscInt *ai = a->i, *aj = a->j; 20 const PetscScalar *aa = a->a; 21 PetscBool *done; 22 PetscReal best,past = 0,future; 23 24 PetscFunctionBegin; 25 /* pick initial row */ 26 best = -1; 27 for (i=0; i<n; i++) { 28 future = 0.0; 29 for (j=ai[i]; j<ai[i+1]; j++) { 30 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 31 else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc1(n,&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc1(n,&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 96 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 97 { 98 PetscInt n = A->rmap->n; 99 PetscErrorCode ierr; 100 101 PetscFunctionBegin; 102 #if defined(PETSC_USE_COMPLEX) 103 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 104 #endif 105 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 106 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 107 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 108 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 109 110 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 111 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 112 113 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 114 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 115 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 116 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 117 118 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 119 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 120 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 121 (*B)->factortype = ftype; 122 123 ierr = PetscFree((*B)->solvertype);CHKERRQ(ierr); 124 ierr = PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 130 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 131 { 132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 133 IS isicol; 134 PetscErrorCode ierr; 135 const PetscInt *r,*ic; 136 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 137 PetscInt *bi,*bj,*ajtmp; 138 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 139 PetscReal f; 140 PetscInt nlnk,*lnk,k,**bi_ptr; 141 PetscFreeSpaceList free_space=NULL,current_space=NULL; 142 PetscBT lnkbt; 143 PetscBool missing; 144 145 PetscFunctionBegin; 146 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 147 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 148 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 149 150 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 151 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 152 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 153 154 /* get new row pointers */ 155 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 156 bi[0] = 0; 157 158 /* bdiag is location of diagonal in factor */ 159 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 160 bdiag[0] = 0; 161 162 /* linked list for storing column indices of the active row */ 163 nlnk = n + 1; 164 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 165 166 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 167 168 /* initial FreeSpace size is f*(ai[n]+1) */ 169 f = info->fill; 170 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 171 current_space = free_space; 172 173 for (i=0; i<n; i++) { 174 /* copy previous fill into linked list */ 175 nzi = 0; 176 nnz = ai[r[i]+1] - ai[r[i]]; 177 ajtmp = aj + ai[r[i]]; 178 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 179 nzi += nlnk; 180 181 /* add pivot rows into linked list */ 182 row = lnk[n]; 183 while (row < i) { 184 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 185 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 186 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 187 nzi += nlnk; 188 row = lnk[row]; 189 } 190 bi[i+1] = bi[i] + nzi; 191 im[i] = nzi; 192 193 /* mark bdiag */ 194 nzbd = 0; 195 nnz = nzi; 196 k = lnk[n]; 197 while (nnz-- && k < i) { 198 nzbd++; 199 k = lnk[k]; 200 } 201 bdiag[i] = bi[i] + nzbd; 202 203 /* if free space is not available, make more free space */ 204 if (current_space->local_remaining<nzi) { 205 nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 206 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 207 reallocs++; 208 } 209 210 /* copy data into free space, then initialize lnk */ 211 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 212 213 bi_ptr[i] = current_space->array; 214 current_space->array += nzi; 215 current_space->local_used += nzi; 216 current_space->local_remaining -= nzi; 217 } 218 #if defined(PETSC_USE_INFO) 219 if (ai[n] != 0) { 220 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 221 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 222 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 223 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 224 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 225 } else { 226 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 227 } 228 #endif 229 230 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 231 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 232 233 /* destroy list of free space and other temporary array(s) */ 234 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 235 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 236 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 237 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 238 239 /* put together the new matrix */ 240 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 241 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 242 b = (Mat_SeqAIJ*)(B)->data; 243 244 b->free_a = PETSC_TRUE; 245 b->free_ij = PETSC_TRUE; 246 b->singlemalloc = PETSC_FALSE; 247 248 ierr = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr); 249 b->j = bj; 250 b->i = bi; 251 b->diag = bdiag; 252 b->ilen = 0; 253 b->imax = 0; 254 b->row = isrow; 255 b->col = iscol; 256 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 257 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 258 b->icol = isicol; 259 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 260 261 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 262 ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 263 b->maxnz = b->nz = bi[n]; 264 265 (B)->factortype = MAT_FACTOR_LU; 266 (B)->info.factor_mallocs = reallocs; 267 (B)->info.fill_ratio_given = f; 268 269 if (ai[n]) { 270 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 271 } else { 272 (B)->info.fill_ratio_needed = 0.0; 273 } 274 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 275 if (a->inode.size) { 276 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 277 } 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 283 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 284 { 285 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 286 IS isicol; 287 PetscErrorCode ierr; 288 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 289 PetscInt i,n=A->rmap->n; 290 PetscInt *bi,*bj; 291 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 292 PetscReal f; 293 PetscInt nlnk,*lnk,k,**bi_ptr; 294 PetscFreeSpaceList free_space=NULL,current_space=NULL; 295 PetscBT lnkbt; 296 PetscBool missing; 297 298 PetscFunctionBegin; 299 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 300 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 301 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 302 303 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 304 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 305 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 306 307 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 308 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 309 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 310 bi[0] = bdiag[0] = 0; 311 312 /* linked list for storing column indices of the active row */ 313 nlnk = n + 1; 314 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 315 316 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 317 318 /* initial FreeSpace size is f*(ai[n]+1) */ 319 f = info->fill; 320 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 321 current_space = free_space; 322 323 for (i=0; i<n; i++) { 324 /* copy previous fill into linked list */ 325 nzi = 0; 326 nnz = ai[r[i]+1] - ai[r[i]]; 327 ajtmp = aj + ai[r[i]]; 328 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 329 nzi += nlnk; 330 331 /* add pivot rows into linked list */ 332 row = lnk[n]; 333 while (row < i) { 334 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 335 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 336 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 337 nzi += nlnk; 338 row = lnk[row]; 339 } 340 bi[i+1] = bi[i] + nzi; 341 im[i] = nzi; 342 343 /* mark bdiag */ 344 nzbd = 0; 345 nnz = nzi; 346 k = lnk[n]; 347 while (nnz-- && k < i) { 348 nzbd++; 349 k = lnk[k]; 350 } 351 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 352 353 /* if free space is not available, make more free space */ 354 if (current_space->local_remaining<nzi) { 355 /* estimated additional space needed */ 356 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi)); 357 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 358 reallocs++; 359 } 360 361 /* copy data into free space, then initialize lnk */ 362 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 363 364 bi_ptr[i] = current_space->array; 365 current_space->array += nzi; 366 current_space->local_used += nzi; 367 current_space->local_remaining -= nzi; 368 } 369 370 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 371 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 372 373 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 374 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 375 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 376 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 377 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 381 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 382 b = (Mat_SeqAIJ*)(B)->data; 383 384 b->free_a = PETSC_TRUE; 385 b->free_ij = PETSC_TRUE; 386 b->singlemalloc = PETSC_FALSE; 387 388 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 389 390 b->j = bj; 391 b->i = bi; 392 b->diag = bdiag; 393 b->ilen = 0; 394 b->imax = 0; 395 b->row = isrow; 396 b->col = iscol; 397 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 398 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 399 b->icol = isicol; 400 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 401 402 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 403 ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 404 b->maxnz = b->nz = bdiag[0]+1; 405 406 B->factortype = MAT_FACTOR_LU; 407 B->info.factor_mallocs = reallocs; 408 B->info.fill_ratio_given = f; 409 410 if (ai[n]) { 411 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 412 } else { 413 B->info.fill_ratio_needed = 0.0; 414 } 415 #if defined(PETSC_USE_INFO) 416 if (ai[n] != 0) { 417 PetscReal af = B->info.fill_ratio_needed; 418 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 419 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 420 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 421 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 422 } else { 423 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 424 } 425 #endif 426 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 427 if (a->inode.size) { 428 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 429 } 430 ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 /* 435 Trouble in factorization, should we dump the original matrix? 436 */ 437 #undef __FUNCT__ 438 #define __FUNCT__ "MatFactorDumpMatrix" 439 PetscErrorCode MatFactorDumpMatrix(Mat A) 440 { 441 PetscErrorCode ierr; 442 PetscBool flg = PETSC_FALSE; 443 444 PetscFunctionBegin; 445 ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 446 if (flg) { 447 PetscViewer viewer; 448 char filename[PETSC_MAX_PATH_LEN]; 449 450 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 451 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 452 ierr = MatView(A,viewer);CHKERRQ(ierr); 453 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 454 } 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 460 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 461 { 462 Mat C =B; 463 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 464 IS isrow = b->row,isicol = b->icol; 465 PetscErrorCode ierr; 466 const PetscInt *r,*ic,*ics; 467 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 468 PetscInt i,j,k,nz,nzL,row,*pj; 469 const PetscInt *ajtmp,*bjtmp; 470 MatScalar *rtmp,*pc,multiplier,*pv; 471 const MatScalar *aa=a->a,*v; 472 PetscBool row_identity,col_identity; 473 FactorShiftCtx sctx; 474 const PetscInt *ddiag; 475 PetscReal rs; 476 MatScalar d; 477 478 PetscFunctionBegin; 479 /* MatPivotSetUp(): initialize shift context sctx */ 480 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 481 482 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 483 ddiag = a->diag; 484 sctx.shift_top = info->zeropivot; 485 for (i=0; i<n; i++) { 486 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 487 d = (aa)[ddiag[i]]; 488 rs = -PetscAbsScalar(d) - PetscRealPart(d); 489 v = aa+ai[i]; 490 nz = ai[i+1] - ai[i]; 491 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 492 if (rs>sctx.shift_top) sctx.shift_top = rs; 493 } 494 sctx.shift_top *= 1.1; 495 sctx.nshift_max = 5; 496 sctx.shift_lo = 0.; 497 sctx.shift_hi = 1.; 498 } 499 500 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 501 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 502 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 503 ics = ic; 504 505 do { 506 sctx.newshift = PETSC_FALSE; 507 for (i=0; i<n; i++) { 508 /* zero rtmp */ 509 /* L part */ 510 nz = bi[i+1] - bi[i]; 511 bjtmp = bj + bi[i]; 512 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 513 514 /* U part */ 515 nz = bdiag[i]-bdiag[i+1]; 516 bjtmp = bj + bdiag[i+1]+1; 517 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 518 519 /* load in initial (unfactored row) */ 520 nz = ai[r[i]+1] - ai[r[i]]; 521 ajtmp = aj + ai[r[i]]; 522 v = aa + ai[r[i]]; 523 for (j=0; j<nz; j++) { 524 rtmp[ics[ajtmp[j]]] = v[j]; 525 } 526 /* ZeropivotApply() */ 527 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 528 529 /* elimination */ 530 bjtmp = bj + bi[i]; 531 row = *bjtmp++; 532 nzL = bi[i+1] - bi[i]; 533 for (k=0; k < nzL; k++) { 534 pc = rtmp + row; 535 if (*pc != 0.0) { 536 pv = b->a + bdiag[row]; 537 multiplier = *pc * (*pv); 538 *pc = multiplier; 539 540 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 541 pv = b->a + bdiag[row+1]+1; 542 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 543 544 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 545 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 546 } 547 row = *bjtmp++; 548 } 549 550 /* finished row so stick it into b->a */ 551 rs = 0.0; 552 /* L part */ 553 pv = b->a + bi[i]; 554 pj = b->j + bi[i]; 555 nz = bi[i+1] - bi[i]; 556 for (j=0; j<nz; j++) { 557 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 558 } 559 560 /* U part */ 561 pv = b->a + bdiag[i+1]+1; 562 pj = b->j + bdiag[i+1]+1; 563 nz = bdiag[i] - bdiag[i+1]-1; 564 for (j=0; j<nz; j++) { 565 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 566 } 567 568 sctx.rs = rs; 569 sctx.pv = rtmp[i]; 570 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 571 if (sctx.newshift) break; /* break for-loop */ 572 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 573 574 /* Mark diagonal and invert diagonal for simplier triangular solves */ 575 pv = b->a + bdiag[i]; 576 *pv = 1.0/rtmp[i]; 577 578 } /* endof for (i=0; i<n; i++) { */ 579 580 /* MatPivotRefine() */ 581 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 582 /* 583 * if no shift in this attempt & shifting & started shifting & can refine, 584 * then try lower shift 585 */ 586 sctx.shift_hi = sctx.shift_fraction; 587 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 588 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 589 sctx.newshift = PETSC_TRUE; 590 sctx.nshift++; 591 } 592 } while (sctx.newshift); 593 594 ierr = PetscFree(rtmp);CHKERRQ(ierr); 595 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 596 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 597 598 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 599 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 600 if (b->inode.size) { 601 C->ops->solve = MatSolve_SeqAIJ_Inode; 602 } else if (row_identity && col_identity) { 603 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 604 } else { 605 C->ops->solve = MatSolve_SeqAIJ; 606 } 607 C->ops->solveadd = MatSolveAdd_SeqAIJ; 608 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 609 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 610 C->ops->matsolve = MatMatSolve_SeqAIJ; 611 C->assembled = PETSC_TRUE; 612 C->preallocated = PETSC_TRUE; 613 614 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 615 616 /* MatShiftView(A,info,&sctx) */ 617 if (sctx.nshift) { 618 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 619 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 620 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 621 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 622 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 623 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 624 } 625 } 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 631 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 632 { 633 Mat C =B; 634 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 635 IS isrow = b->row,isicol = b->icol; 636 PetscErrorCode ierr; 637 const PetscInt *r,*ic,*ics; 638 PetscInt nz,row,i,j,n=A->rmap->n,diag; 639 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 640 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 641 MatScalar *pv,*rtmp,*pc,multiplier,d; 642 const MatScalar *v,*aa=a->a; 643 PetscReal rs=0.0; 644 FactorShiftCtx sctx; 645 const PetscInt *ddiag; 646 PetscBool row_identity, col_identity; 647 648 PetscFunctionBegin; 649 /* MatPivotSetUp(): initialize shift context sctx */ 650 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 651 652 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 653 ddiag = a->diag; 654 sctx.shift_top = info->zeropivot; 655 for (i=0; i<n; i++) { 656 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 657 d = (aa)[ddiag[i]]; 658 rs = -PetscAbsScalar(d) - PetscRealPart(d); 659 v = aa+ai[i]; 660 nz = ai[i+1] - ai[i]; 661 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 662 if (rs>sctx.shift_top) sctx.shift_top = rs; 663 } 664 sctx.shift_top *= 1.1; 665 sctx.nshift_max = 5; 666 sctx.shift_lo = 0.; 667 sctx.shift_hi = 1.; 668 } 669 670 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 671 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 672 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 673 ics = ic; 674 675 do { 676 sctx.newshift = PETSC_FALSE; 677 for (i=0; i<n; i++) { 678 nz = bi[i+1] - bi[i]; 679 bjtmp = bj + bi[i]; 680 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 681 682 /* load in initial (unfactored row) */ 683 nz = ai[r[i]+1] - ai[r[i]]; 684 ajtmp = aj + ai[r[i]]; 685 v = aa + ai[r[i]]; 686 for (j=0; j<nz; j++) { 687 rtmp[ics[ajtmp[j]]] = v[j]; 688 } 689 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 690 691 row = *bjtmp++; 692 while (row < i) { 693 pc = rtmp + row; 694 if (*pc != 0.0) { 695 pv = b->a + diag_offset[row]; 696 pj = b->j + diag_offset[row] + 1; 697 multiplier = *pc / *pv++; 698 *pc = multiplier; 699 nz = bi[row+1] - diag_offset[row] - 1; 700 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 701 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 702 } 703 row = *bjtmp++; 704 } 705 /* finished row so stick it into b->a */ 706 pv = b->a + bi[i]; 707 pj = b->j + bi[i]; 708 nz = bi[i+1] - bi[i]; 709 diag = diag_offset[i] - bi[i]; 710 rs = 0.0; 711 for (j=0; j<nz; j++) { 712 pv[j] = rtmp[pj[j]]; 713 rs += PetscAbsScalar(pv[j]); 714 } 715 rs -= PetscAbsScalar(pv[diag]); 716 717 sctx.rs = rs; 718 sctx.pv = pv[diag]; 719 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 720 if (sctx.newshift) break; 721 pv[diag] = sctx.pv; 722 } 723 724 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 725 /* 726 * if no shift in this attempt & shifting & started shifting & can refine, 727 * then try lower shift 728 */ 729 sctx.shift_hi = sctx.shift_fraction; 730 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 731 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 732 sctx.newshift = PETSC_TRUE; 733 sctx.nshift++; 734 } 735 } while (sctx.newshift); 736 737 /* invert diagonal entries for simplier triangular solves */ 738 for (i=0; i<n; i++) { 739 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 740 } 741 ierr = PetscFree(rtmp);CHKERRQ(ierr); 742 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 743 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 744 745 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 746 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 747 if (row_identity && col_identity) { 748 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 749 } else { 750 C->ops->solve = MatSolve_SeqAIJ_inplace; 751 } 752 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 753 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 754 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 755 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 756 757 C->assembled = PETSC_TRUE; 758 C->preallocated = PETSC_TRUE; 759 760 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 761 if (sctx.nshift) { 762 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 763 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 764 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 765 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 766 } 767 } 768 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 769 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 770 771 ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } 774 775 /* 776 This routine implements inplace ILU(0) with row or/and column permutations. 777 Input: 778 A - original matrix 779 Output; 780 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 781 a->j (col index) is permuted by the inverse of colperm, then sorted 782 a->a reordered accordingly with a->j 783 a->diag (ptr to diagonal elements) is updated. 784 */ 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 787 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 788 { 789 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 790 IS isrow = a->row,isicol = a->icol; 791 PetscErrorCode ierr; 792 const PetscInt *r,*ic,*ics; 793 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 794 PetscInt *ajtmp,nz,row; 795 PetscInt *diag = a->diag,nbdiag,*pj; 796 PetscScalar *rtmp,*pc,multiplier,d; 797 MatScalar *pv,*v; 798 PetscReal rs; 799 FactorShiftCtx sctx; 800 const MatScalar *aa=a->a,*vtmp; 801 802 PetscFunctionBegin; 803 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 804 805 /* MatPivotSetUp(): initialize shift context sctx */ 806 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 807 808 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 809 const PetscInt *ddiag = a->diag; 810 sctx.shift_top = info->zeropivot; 811 for (i=0; i<n; i++) { 812 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 813 d = (aa)[ddiag[i]]; 814 rs = -PetscAbsScalar(d) - PetscRealPart(d); 815 vtmp = aa+ai[i]; 816 nz = ai[i+1] - ai[i]; 817 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 818 if (rs>sctx.shift_top) sctx.shift_top = rs; 819 } 820 sctx.shift_top *= 1.1; 821 sctx.nshift_max = 5; 822 sctx.shift_lo = 0.; 823 sctx.shift_hi = 1.; 824 } 825 826 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 827 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 828 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 829 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 830 ics = ic; 831 832 #if defined(MV) 833 sctx.shift_top = 0.; 834 sctx.nshift_max = 0; 835 sctx.shift_lo = 0.; 836 sctx.shift_hi = 0.; 837 sctx.shift_fraction = 0.; 838 839 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 840 sctx.shift_top = 0.; 841 for (i=0; i<n; i++) { 842 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 843 d = (a->a)[diag[i]]; 844 rs = -PetscAbsScalar(d) - PetscRealPart(d); 845 v = a->a+ai[i]; 846 nz = ai[i+1] - ai[i]; 847 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 848 if (rs>sctx.shift_top) sctx.shift_top = rs; 849 } 850 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 851 sctx.shift_top *= 1.1; 852 sctx.nshift_max = 5; 853 sctx.shift_lo = 0.; 854 sctx.shift_hi = 1.; 855 } 856 857 sctx.shift_amount = 0.; 858 sctx.nshift = 0; 859 #endif 860 861 do { 862 sctx.newshift = PETSC_FALSE; 863 for (i=0; i<n; i++) { 864 /* load in initial unfactored row */ 865 nz = ai[r[i]+1] - ai[r[i]]; 866 ajtmp = aj + ai[r[i]]; 867 v = a->a + ai[r[i]]; 868 /* sort permuted ajtmp and values v accordingly */ 869 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 870 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 871 872 diag[r[i]] = ai[r[i]]; 873 for (j=0; j<nz; j++) { 874 rtmp[ajtmp[j]] = v[j]; 875 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 876 } 877 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 878 879 row = *ajtmp++; 880 while (row < i) { 881 pc = rtmp + row; 882 if (*pc != 0.0) { 883 pv = a->a + diag[r[row]]; 884 pj = aj + diag[r[row]] + 1; 885 886 multiplier = *pc / *pv++; 887 *pc = multiplier; 888 nz = ai[r[row]+1] - diag[r[row]] - 1; 889 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 890 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 891 } 892 row = *ajtmp++; 893 } 894 /* finished row so overwrite it onto a->a */ 895 pv = a->a + ai[r[i]]; 896 pj = aj + ai[r[i]]; 897 nz = ai[r[i]+1] - ai[r[i]]; 898 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 899 900 rs = 0.0; 901 for (j=0; j<nz; j++) { 902 pv[j] = rtmp[pj[j]]; 903 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 904 } 905 906 sctx.rs = rs; 907 sctx.pv = pv[nbdiag]; 908 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 909 if (sctx.newshift) break; 910 pv[nbdiag] = sctx.pv; 911 } 912 913 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 914 /* 915 * if no shift in this attempt & shifting & started shifting & can refine, 916 * then try lower shift 917 */ 918 sctx.shift_hi = sctx.shift_fraction; 919 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 920 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 921 sctx.newshift = PETSC_TRUE; 922 sctx.nshift++; 923 } 924 } while (sctx.newshift); 925 926 /* invert diagonal entries for simplier triangular solves */ 927 for (i=0; i<n; i++) { 928 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 929 } 930 931 ierr = PetscFree(rtmp);CHKERRQ(ierr); 932 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 933 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 934 935 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 936 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 937 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 938 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 939 940 A->assembled = PETSC_TRUE; 941 A->preallocated = PETSC_TRUE; 942 943 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 944 if (sctx.nshift) { 945 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 946 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 947 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 948 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 949 } 950 } 951 PetscFunctionReturn(0); 952 } 953 954 /* ----------------------------------------------------------- */ 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatLUFactor_SeqAIJ" 957 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 958 { 959 PetscErrorCode ierr; 960 Mat C; 961 962 PetscFunctionBegin; 963 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 964 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 965 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 966 967 A->ops->solve = C->ops->solve; 968 A->ops->solvetranspose = C->ops->solvetranspose; 969 970 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 971 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 /* ----------------------------------------------------------- */ 975 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 979 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 980 { 981 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 982 IS iscol = a->col,isrow = a->row; 983 PetscErrorCode ierr; 984 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 985 PetscInt nz; 986 const PetscInt *rout,*cout,*r,*c; 987 PetscScalar *x,*tmp,*tmps,sum; 988 const PetscScalar *b; 989 const MatScalar *aa = a->a,*v; 990 991 PetscFunctionBegin; 992 if (!n) PetscFunctionReturn(0); 993 994 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 995 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 996 tmp = a->solve_work; 997 998 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 999 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1000 1001 /* forward solve the lower triangular */ 1002 tmp[0] = b[*r++]; 1003 tmps = tmp; 1004 for (i=1; i<n; i++) { 1005 v = aa + ai[i]; 1006 vi = aj + ai[i]; 1007 nz = a->diag[i] - ai[i]; 1008 sum = b[*r++]; 1009 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1010 tmp[i] = sum; 1011 } 1012 1013 /* backward solve the upper triangular */ 1014 for (i=n-1; i>=0; i--) { 1015 v = aa + a->diag[i] + 1; 1016 vi = aj + a->diag[i] + 1; 1017 nz = ai[i+1] - a->diag[i] - 1; 1018 sum = tmp[i]; 1019 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1020 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1021 } 1022 1023 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1024 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1025 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1026 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1027 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1033 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1034 { 1035 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1036 IS iscol = a->col,isrow = a->row; 1037 PetscErrorCode ierr; 1038 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1039 PetscInt nz,neq; 1040 const PetscInt *rout,*cout,*r,*c; 1041 PetscScalar *x,*b,*tmp,*tmps,sum; 1042 const MatScalar *aa = a->a,*v; 1043 PetscBool bisdense,xisdense; 1044 1045 PetscFunctionBegin; 1046 if (!n) PetscFunctionReturn(0); 1047 1048 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1049 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1050 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1051 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1052 1053 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1054 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1055 1056 tmp = a->solve_work; 1057 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1058 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1059 1060 for (neq=0; neq<B->cmap->n; neq++) { 1061 /* forward solve the lower triangular */ 1062 tmp[0] = b[r[0]]; 1063 tmps = tmp; 1064 for (i=1; i<n; i++) { 1065 v = aa + ai[i]; 1066 vi = aj + ai[i]; 1067 nz = a->diag[i] - ai[i]; 1068 sum = b[r[i]]; 1069 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1070 tmp[i] = sum; 1071 } 1072 /* backward solve the upper triangular */ 1073 for (i=n-1; i>=0; i--) { 1074 v = aa + a->diag[i] + 1; 1075 vi = aj + a->diag[i] + 1; 1076 nz = ai[i+1] - a->diag[i] - 1; 1077 sum = tmp[i]; 1078 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1079 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1080 } 1081 1082 b += n; 1083 x += n; 1084 } 1085 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1086 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1087 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1088 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1089 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1095 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1096 { 1097 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1098 IS iscol = a->col,isrow = a->row; 1099 PetscErrorCode ierr; 1100 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1101 PetscInt nz,neq; 1102 const PetscInt *rout,*cout,*r,*c; 1103 PetscScalar *x,*b,*tmp,sum; 1104 const MatScalar *aa = a->a,*v; 1105 PetscBool bisdense,xisdense; 1106 1107 PetscFunctionBegin; 1108 if (!n) PetscFunctionReturn(0); 1109 1110 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1111 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1112 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1113 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1114 1115 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1116 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1117 1118 tmp = a->solve_work; 1119 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1120 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1121 1122 for (neq=0; neq<B->cmap->n; neq++) { 1123 /* forward solve the lower triangular */ 1124 tmp[0] = b[r[0]]; 1125 v = aa; 1126 vi = aj; 1127 for (i=1; i<n; i++) { 1128 nz = ai[i+1] - ai[i]; 1129 sum = b[r[i]]; 1130 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1131 tmp[i] = sum; 1132 v += nz; vi += nz; 1133 } 1134 1135 /* backward solve the upper triangular */ 1136 for (i=n-1; i>=0; i--) { 1137 v = aa + adiag[i+1]+1; 1138 vi = aj + adiag[i+1]+1; 1139 nz = adiag[i]-adiag[i+1]-1; 1140 sum = tmp[i]; 1141 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1142 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1143 } 1144 1145 b += n; 1146 x += n; 1147 } 1148 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1149 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1150 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1151 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1152 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1158 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1159 { 1160 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1161 IS iscol = a->col,isrow = a->row; 1162 PetscErrorCode ierr; 1163 const PetscInt *r,*c,*rout,*cout; 1164 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1165 PetscInt nz,row; 1166 PetscScalar *x,*b,*tmp,*tmps,sum; 1167 const MatScalar *aa = a->a,*v; 1168 1169 PetscFunctionBegin; 1170 if (!n) PetscFunctionReturn(0); 1171 1172 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1174 tmp = a->solve_work; 1175 1176 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1177 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1178 1179 /* forward solve the lower triangular */ 1180 tmp[0] = b[*r++]; 1181 tmps = tmp; 1182 for (row=1; row<n; row++) { 1183 i = rout[row]; /* permuted row */ 1184 v = aa + ai[i]; 1185 vi = aj + ai[i]; 1186 nz = a->diag[i] - ai[i]; 1187 sum = b[*r++]; 1188 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1189 tmp[row] = sum; 1190 } 1191 1192 /* backward solve the upper triangular */ 1193 for (row=n-1; row>=0; row--) { 1194 i = rout[row]; /* permuted row */ 1195 v = aa + a->diag[i] + 1; 1196 vi = aj + a->diag[i] + 1; 1197 nz = ai[i+1] - a->diag[i] - 1; 1198 sum = tmp[row]; 1199 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1200 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1201 } 1202 1203 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1204 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1205 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1206 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1207 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 /* ----------------------------------------------------------- */ 1212 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1215 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1216 { 1217 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1218 PetscErrorCode ierr; 1219 PetscInt n = A->rmap->n; 1220 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1221 PetscScalar *x; 1222 const PetscScalar *b; 1223 const MatScalar *aa = a->a; 1224 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1225 PetscInt adiag_i,i,nz,ai_i; 1226 const PetscInt *vi; 1227 const MatScalar *v; 1228 PetscScalar sum; 1229 #endif 1230 1231 PetscFunctionBegin; 1232 if (!n) PetscFunctionReturn(0); 1233 1234 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1235 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1236 1237 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1238 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1239 #else 1240 /* forward solve the lower triangular */ 1241 x[0] = b[0]; 1242 for (i=1; i<n; i++) { 1243 ai_i = ai[i]; 1244 v = aa + ai_i; 1245 vi = aj + ai_i; 1246 nz = adiag[i] - ai_i; 1247 sum = b[i]; 1248 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1249 x[i] = sum; 1250 } 1251 1252 /* backward solve the upper triangular */ 1253 for (i=n-1; i>=0; i--) { 1254 adiag_i = adiag[i]; 1255 v = aa + adiag_i + 1; 1256 vi = aj + adiag_i + 1; 1257 nz = ai[i+1] - adiag_i - 1; 1258 sum = x[i]; 1259 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1260 x[i] = sum*aa[adiag_i]; 1261 } 1262 #endif 1263 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1264 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1265 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1271 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1272 { 1273 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1274 IS iscol = a->col,isrow = a->row; 1275 PetscErrorCode ierr; 1276 PetscInt i, n = A->rmap->n,j; 1277 PetscInt nz; 1278 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1279 PetscScalar *x,*tmp,sum; 1280 const PetscScalar *b; 1281 const MatScalar *aa = a->a,*v; 1282 1283 PetscFunctionBegin; 1284 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1285 1286 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1287 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1288 tmp = a->solve_work; 1289 1290 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1291 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1292 1293 /* forward solve the lower triangular */ 1294 tmp[0] = b[*r++]; 1295 for (i=1; i<n; i++) { 1296 v = aa + ai[i]; 1297 vi = aj + ai[i]; 1298 nz = a->diag[i] - ai[i]; 1299 sum = b[*r++]; 1300 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1301 tmp[i] = sum; 1302 } 1303 1304 /* backward solve the upper triangular */ 1305 for (i=n-1; i>=0; i--) { 1306 v = aa + a->diag[i] + 1; 1307 vi = aj + a->diag[i] + 1; 1308 nz = ai[i+1] - a->diag[i] - 1; 1309 sum = tmp[i]; 1310 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1311 tmp[i] = sum*aa[a->diag[i]]; 1312 x[*c--] += tmp[i]; 1313 } 1314 1315 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1316 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1317 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1318 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1319 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1325 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1326 { 1327 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1328 IS iscol = a->col,isrow = a->row; 1329 PetscErrorCode ierr; 1330 PetscInt i, n = A->rmap->n,j; 1331 PetscInt nz; 1332 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1333 PetscScalar *x,*tmp,sum; 1334 const PetscScalar *b; 1335 const MatScalar *aa = a->a,*v; 1336 1337 PetscFunctionBegin; 1338 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1339 1340 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1341 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1342 tmp = a->solve_work; 1343 1344 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1345 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1346 1347 /* forward solve the lower triangular */ 1348 tmp[0] = b[r[0]]; 1349 v = aa; 1350 vi = aj; 1351 for (i=1; i<n; i++) { 1352 nz = ai[i+1] - ai[i]; 1353 sum = b[r[i]]; 1354 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1355 tmp[i] = sum; 1356 v += nz; 1357 vi += nz; 1358 } 1359 1360 /* backward solve the upper triangular */ 1361 v = aa + adiag[n-1]; 1362 vi = aj + adiag[n-1]; 1363 for (i=n-1; i>=0; i--) { 1364 nz = adiag[i] - adiag[i+1] - 1; 1365 sum = tmp[i]; 1366 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1367 tmp[i] = sum*v[nz]; 1368 x[c[i]] += tmp[i]; 1369 v += nz+1; vi += nz+1; 1370 } 1371 1372 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1373 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1374 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1375 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1376 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1382 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1383 { 1384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1385 IS iscol = a->col,isrow = a->row; 1386 PetscErrorCode ierr; 1387 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1388 PetscInt i,n = A->rmap->n,j; 1389 PetscInt nz; 1390 PetscScalar *x,*tmp,s1; 1391 const MatScalar *aa = a->a,*v; 1392 const PetscScalar *b; 1393 1394 PetscFunctionBegin; 1395 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1396 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1397 tmp = a->solve_work; 1398 1399 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1400 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1401 1402 /* copy the b into temp work space according to permutation */ 1403 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1404 1405 /* forward solve the U^T */ 1406 for (i=0; i<n; i++) { 1407 v = aa + diag[i]; 1408 vi = aj + diag[i] + 1; 1409 nz = ai[i+1] - diag[i] - 1; 1410 s1 = tmp[i]; 1411 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1412 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1413 tmp[i] = s1; 1414 } 1415 1416 /* backward solve the L^T */ 1417 for (i=n-1; i>=0; i--) { 1418 v = aa + diag[i] - 1; 1419 vi = aj + diag[i] - 1; 1420 nz = diag[i] - ai[i]; 1421 s1 = tmp[i]; 1422 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1423 } 1424 1425 /* copy tmp into x according to permutation */ 1426 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1427 1428 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1429 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1430 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1431 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1432 1433 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1439 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1440 { 1441 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1442 IS iscol = a->col,isrow = a->row; 1443 PetscErrorCode ierr; 1444 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1445 PetscInt i,n = A->rmap->n,j; 1446 PetscInt nz; 1447 PetscScalar *x,*tmp,s1; 1448 const MatScalar *aa = a->a,*v; 1449 const PetscScalar *b; 1450 1451 PetscFunctionBegin; 1452 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1453 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1454 tmp = a->solve_work; 1455 1456 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1457 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1458 1459 /* copy the b into temp work space according to permutation */ 1460 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1461 1462 /* forward solve the U^T */ 1463 for (i=0; i<n; i++) { 1464 v = aa + adiag[i+1] + 1; 1465 vi = aj + adiag[i+1] + 1; 1466 nz = adiag[i] - adiag[i+1] - 1; 1467 s1 = tmp[i]; 1468 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1469 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1470 tmp[i] = s1; 1471 } 1472 1473 /* backward solve the L^T */ 1474 for (i=n-1; i>=0; i--) { 1475 v = aa + ai[i]; 1476 vi = aj + ai[i]; 1477 nz = ai[i+1] - ai[i]; 1478 s1 = tmp[i]; 1479 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1480 } 1481 1482 /* copy tmp into x according to permutation */ 1483 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1484 1485 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1486 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1487 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1488 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1489 1490 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1496 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1497 { 1498 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1499 IS iscol = a->col,isrow = a->row; 1500 PetscErrorCode ierr; 1501 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1502 PetscInt i,n = A->rmap->n,j; 1503 PetscInt nz; 1504 PetscScalar *x,*tmp,s1; 1505 const MatScalar *aa = a->a,*v; 1506 const PetscScalar *b; 1507 1508 PetscFunctionBegin; 1509 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1510 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1511 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1512 tmp = a->solve_work; 1513 1514 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1515 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1516 1517 /* copy the b into temp work space according to permutation */ 1518 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1519 1520 /* forward solve the U^T */ 1521 for (i=0; i<n; i++) { 1522 v = aa + diag[i]; 1523 vi = aj + diag[i] + 1; 1524 nz = ai[i+1] - diag[i] - 1; 1525 s1 = tmp[i]; 1526 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1527 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1528 tmp[i] = s1; 1529 } 1530 1531 /* backward solve the L^T */ 1532 for (i=n-1; i>=0; i--) { 1533 v = aa + diag[i] - 1; 1534 vi = aj + diag[i] - 1; 1535 nz = diag[i] - ai[i]; 1536 s1 = tmp[i]; 1537 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1538 } 1539 1540 /* copy tmp into x according to permutation */ 1541 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1542 1543 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1544 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1545 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1546 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1547 1548 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1549 PetscFunctionReturn(0); 1550 } 1551 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1554 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1555 { 1556 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1557 IS iscol = a->col,isrow = a->row; 1558 PetscErrorCode ierr; 1559 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1560 PetscInt i,n = A->rmap->n,j; 1561 PetscInt nz; 1562 PetscScalar *x,*tmp,s1; 1563 const MatScalar *aa = a->a,*v; 1564 const PetscScalar *b; 1565 1566 PetscFunctionBegin; 1567 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1568 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1569 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1570 tmp = a->solve_work; 1571 1572 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1573 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1574 1575 /* copy the b into temp work space according to permutation */ 1576 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1577 1578 /* forward solve the U^T */ 1579 for (i=0; i<n; i++) { 1580 v = aa + adiag[i+1] + 1; 1581 vi = aj + adiag[i+1] + 1; 1582 nz = adiag[i] - adiag[i+1] - 1; 1583 s1 = tmp[i]; 1584 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1585 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1586 tmp[i] = s1; 1587 } 1588 1589 1590 /* backward solve the L^T */ 1591 for (i=n-1; i>=0; i--) { 1592 v = aa + ai[i]; 1593 vi = aj + ai[i]; 1594 nz = ai[i+1] - ai[i]; 1595 s1 = tmp[i]; 1596 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1597 } 1598 1599 /* copy tmp into x according to permutation */ 1600 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1601 1602 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1603 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1604 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1605 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1606 1607 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1608 PetscFunctionReturn(0); 1609 } 1610 1611 /* ----------------------------------------------------------------*/ 1612 1613 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 1614 1615 /* 1616 ilu() under revised new data structure. 1617 Factored arrays bj and ba are stored as 1618 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1619 1620 bi=fact->i is an array of size n+1, in which 1621 bi+ 1622 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1623 bi[n]: points to L(n-1,n-1)+1 1624 1625 bdiag=fact->diag is an array of size n+1,in which 1626 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1627 bdiag[n]: points to entry of U(n-1,0)-1 1628 1629 U(i,:) contains bdiag[i] as its last entry, i.e., 1630 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1631 */ 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1634 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1635 { 1636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1637 PetscErrorCode ierr; 1638 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1639 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1640 IS isicol; 1641 1642 PetscFunctionBegin; 1643 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1644 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1645 b = (Mat_SeqAIJ*)(fact)->data; 1646 1647 /* allocate matrix arrays for new data structure */ 1648 ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 1649 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1650 1651 b->singlemalloc = PETSC_TRUE; 1652 if (!b->diag) { 1653 ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); 1654 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1655 } 1656 bdiag = b->diag; 1657 1658 if (n > 0) { 1659 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1660 } 1661 1662 /* set bi and bj with new data structure */ 1663 bi = b->i; 1664 bj = b->j; 1665 1666 /* L part */ 1667 bi[0] = 0; 1668 for (i=0; i<n; i++) { 1669 nz = adiag[i] - ai[i]; 1670 bi[i+1] = bi[i] + nz; 1671 aj = a->j + ai[i]; 1672 for (j=0; j<nz; j++) { 1673 /* *bj = aj[j]; bj++; */ 1674 bj[k++] = aj[j]; 1675 } 1676 } 1677 1678 /* U part */ 1679 bdiag[n] = bi[n]-1; 1680 for (i=n-1; i>=0; i--) { 1681 nz = ai[i+1] - adiag[i] - 1; 1682 aj = a->j + adiag[i] + 1; 1683 for (j=0; j<nz; j++) { 1684 /* *bj = aj[j]; bj++; */ 1685 bj[k++] = aj[j]; 1686 } 1687 /* diag[i] */ 1688 /* *bj = i; bj++; */ 1689 bj[k++] = i; 1690 bdiag[i] = bdiag[i+1] + nz + 1; 1691 } 1692 1693 fact->factortype = MAT_FACTOR_ILU; 1694 fact->info.factor_mallocs = 0; 1695 fact->info.fill_ratio_given = info->fill; 1696 fact->info.fill_ratio_needed = 1.0; 1697 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1698 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1699 1700 b = (Mat_SeqAIJ*)(fact)->data; 1701 b->row = isrow; 1702 b->col = iscol; 1703 b->icol = isicol; 1704 ierr = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1705 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1706 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1712 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1713 { 1714 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1715 IS isicol; 1716 PetscErrorCode ierr; 1717 const PetscInt *r,*ic; 1718 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1719 PetscInt *bi,*cols,nnz,*cols_lvl; 1720 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1721 PetscInt i,levels,diagonal_fill; 1722 PetscBool col_identity,row_identity,missing; 1723 PetscReal f; 1724 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1725 PetscBT lnkbt; 1726 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1727 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1728 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1729 1730 PetscFunctionBegin; 1731 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1732 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1733 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1734 1735 levels = (PetscInt)info->levels; 1736 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1737 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1738 if (!levels && row_identity && col_identity) { 1739 /* special case: ilu(0) with natural ordering */ 1740 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1741 if (a->inode.size) { 1742 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1743 } 1744 PetscFunctionReturn(0); 1745 } 1746 1747 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1748 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1749 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1750 1751 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1752 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1753 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1754 bi[0] = bdiag[0] = 0; 1755 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1756 1757 /* create a linked list for storing column indices of the active row */ 1758 nlnk = n + 1; 1759 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1760 1761 /* initial FreeSpace size is f*(ai[n]+1) */ 1762 f = info->fill; 1763 diagonal_fill = (PetscInt)info->diagonal_fill; 1764 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1765 current_space = free_space; 1766 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1767 current_space_lvl = free_space_lvl; 1768 for (i=0; i<n; i++) { 1769 nzi = 0; 1770 /* copy current row into linked list */ 1771 nnz = ai[r[i]+1] - ai[r[i]]; 1772 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1773 cols = aj + ai[r[i]]; 1774 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1775 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1776 nzi += nlnk; 1777 1778 /* make sure diagonal entry is included */ 1779 if (diagonal_fill && lnk[i] == -1) { 1780 fm = n; 1781 while (lnk[fm] < i) fm = lnk[fm]; 1782 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1783 lnk[fm] = i; 1784 lnk_lvl[i] = 0; 1785 nzi++; dcount++; 1786 } 1787 1788 /* add pivot rows into the active row */ 1789 nzbd = 0; 1790 prow = lnk[n]; 1791 while (prow < i) { 1792 nnz = bdiag[prow]; 1793 cols = bj_ptr[prow] + nnz + 1; 1794 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1795 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1796 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1797 nzi += nlnk; 1798 prow = lnk[prow]; 1799 nzbd++; 1800 } 1801 bdiag[i] = nzbd; 1802 bi[i+1] = bi[i] + nzi; 1803 /* if free space is not available, make more free space */ 1804 if (current_space->local_remaining<nzi) { 1805 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */ 1806 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1807 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1808 reallocs++; 1809 } 1810 1811 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1812 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1813 bj_ptr[i] = current_space->array; 1814 bjlvl_ptr[i] = current_space_lvl->array; 1815 1816 /* make sure the active row i has diagonal entry */ 1817 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1818 1819 current_space->array += nzi; 1820 current_space->local_used += nzi; 1821 current_space->local_remaining -= nzi; 1822 current_space_lvl->array += nzi; 1823 current_space_lvl->local_used += nzi; 1824 current_space_lvl->local_remaining -= nzi; 1825 } 1826 1827 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1828 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1829 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1830 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1831 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1832 1833 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1834 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1835 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1836 1837 #if defined(PETSC_USE_INFO) 1838 { 1839 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1840 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1841 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1842 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1843 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1844 if (diagonal_fill) { 1845 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 1846 } 1847 } 1848 #endif 1849 /* put together the new matrix */ 1850 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1851 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 1852 b = (Mat_SeqAIJ*)(fact)->data; 1853 1854 b->free_a = PETSC_TRUE; 1855 b->free_ij = PETSC_TRUE; 1856 b->singlemalloc = PETSC_FALSE; 1857 1858 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 1859 1860 b->j = bj; 1861 b->i = bi; 1862 b->diag = bdiag; 1863 b->ilen = 0; 1864 b->imax = 0; 1865 b->row = isrow; 1866 b->col = iscol; 1867 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1868 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1869 b->icol = isicol; 1870 1871 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 1872 /* In b structure: Free imax, ilen, old a, old j. 1873 Allocate bdiag, solve_work, new a, new j */ 1874 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1875 b->maxnz = b->nz = bdiag[0]+1; 1876 1877 (fact)->info.factor_mallocs = reallocs; 1878 (fact)->info.fill_ratio_given = f; 1879 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1880 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1881 if (a->inode.size) { 1882 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1883 } 1884 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1890 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1891 { 1892 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1893 IS isicol; 1894 PetscErrorCode ierr; 1895 const PetscInt *r,*ic; 1896 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1897 PetscInt *bi,*cols,nnz,*cols_lvl; 1898 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1899 PetscInt i,levels,diagonal_fill; 1900 PetscBool col_identity,row_identity; 1901 PetscReal f; 1902 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1903 PetscBT lnkbt; 1904 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1905 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1906 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1907 PetscBool missing; 1908 1909 PetscFunctionBegin; 1910 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1911 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1912 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1913 1914 f = info->fill; 1915 levels = (PetscInt)info->levels; 1916 diagonal_fill = (PetscInt)info->diagonal_fill; 1917 1918 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1919 1920 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1921 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1922 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1923 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1924 1925 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1926 if (a->inode.size) { 1927 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1928 } 1929 fact->factortype = MAT_FACTOR_ILU; 1930 (fact)->info.factor_mallocs = 0; 1931 (fact)->info.fill_ratio_given = info->fill; 1932 (fact)->info.fill_ratio_needed = 1.0; 1933 1934 b = (Mat_SeqAIJ*)(fact)->data; 1935 b->row = isrow; 1936 b->col = iscol; 1937 b->icol = isicol; 1938 ierr = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1939 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1940 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 1944 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1945 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1946 1947 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1948 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1949 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1950 bi[0] = bdiag[0] = 0; 1951 1952 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1953 1954 /* create a linked list for storing column indices of the active row */ 1955 nlnk = n + 1; 1956 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1957 1958 /* initial FreeSpace size is f*(ai[n]+1) */ 1959 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1960 current_space = free_space; 1961 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1962 current_space_lvl = free_space_lvl; 1963 1964 for (i=0; i<n; i++) { 1965 nzi = 0; 1966 /* copy current row into linked list */ 1967 nnz = ai[r[i]+1] - ai[r[i]]; 1968 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1969 cols = aj + ai[r[i]]; 1970 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1971 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1972 nzi += nlnk; 1973 1974 /* make sure diagonal entry is included */ 1975 if (diagonal_fill && lnk[i] == -1) { 1976 fm = n; 1977 while (lnk[fm] < i) fm = lnk[fm]; 1978 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1979 lnk[fm] = i; 1980 lnk_lvl[i] = 0; 1981 nzi++; dcount++; 1982 } 1983 1984 /* add pivot rows into the active row */ 1985 nzbd = 0; 1986 prow = lnk[n]; 1987 while (prow < i) { 1988 nnz = bdiag[prow]; 1989 cols = bj_ptr[prow] + nnz + 1; 1990 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1991 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1992 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1993 nzi += nlnk; 1994 prow = lnk[prow]; 1995 nzbd++; 1996 } 1997 bdiag[i] = nzbd; 1998 bi[i+1] = bi[i] + nzi; 1999 2000 /* if free space is not available, make more free space */ 2001 if (current_space->local_remaining<nzi) { 2002 nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */ 2003 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2004 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2005 reallocs++; 2006 } 2007 2008 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2009 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2010 bj_ptr[i] = current_space->array; 2011 bjlvl_ptr[i] = current_space_lvl->array; 2012 2013 /* make sure the active row i has diagonal entry */ 2014 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2015 2016 current_space->array += nzi; 2017 current_space->local_used += nzi; 2018 current_space->local_remaining -= nzi; 2019 current_space_lvl->array += nzi; 2020 current_space_lvl->local_used += nzi; 2021 current_space_lvl->local_remaining -= nzi; 2022 } 2023 2024 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2025 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2026 2027 /* destroy list of free space and other temporary arrays */ 2028 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 2029 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2030 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2031 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2032 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2033 2034 #if defined(PETSC_USE_INFO) 2035 { 2036 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2037 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 2038 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2039 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 2040 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2041 if (diagonal_fill) { 2042 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 2043 } 2044 } 2045 #endif 2046 2047 /* put together the new matrix */ 2048 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2049 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 2050 b = (Mat_SeqAIJ*)(fact)->data; 2051 2052 b->free_a = PETSC_TRUE; 2053 b->free_ij = PETSC_TRUE; 2054 b->singlemalloc = PETSC_FALSE; 2055 2056 ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr); 2057 b->j = bj; 2058 b->i = bi; 2059 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2060 b->diag = bdiag; 2061 b->ilen = 0; 2062 b->imax = 0; 2063 b->row = isrow; 2064 b->col = iscol; 2065 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2066 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2067 b->icol = isicol; 2068 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 2069 /* In b structure: Free imax, ilen, old a, old j. 2070 Allocate bdiag, solve_work, new a, new j */ 2071 ierr = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2072 b->maxnz = b->nz = bi[n]; 2073 2074 (fact)->info.factor_mallocs = reallocs; 2075 (fact)->info.fill_ratio_given = f; 2076 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2077 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2078 if (a->inode.size) { 2079 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2080 } 2081 PetscFunctionReturn(0); 2082 } 2083 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2086 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2087 { 2088 Mat C = B; 2089 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2090 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2091 IS ip=b->row,iip = b->icol; 2092 PetscErrorCode ierr; 2093 const PetscInt *rip,*riip; 2094 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2095 PetscInt *ai=a->i,*aj=a->j; 2096 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2097 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2098 PetscBool perm_identity; 2099 FactorShiftCtx sctx; 2100 PetscReal rs; 2101 MatScalar d,*v; 2102 2103 PetscFunctionBegin; 2104 /* MatPivotSetUp(): initialize shift context sctx */ 2105 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2106 2107 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2108 sctx.shift_top = info->zeropivot; 2109 for (i=0; i<mbs; i++) { 2110 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2111 d = (aa)[a->diag[i]]; 2112 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2113 v = aa+ai[i]; 2114 nz = ai[i+1] - ai[i]; 2115 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2116 if (rs>sctx.shift_top) sctx.shift_top = rs; 2117 } 2118 sctx.shift_top *= 1.1; 2119 sctx.nshift_max = 5; 2120 sctx.shift_lo = 0.; 2121 sctx.shift_hi = 1.; 2122 } 2123 2124 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2125 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2126 2127 /* allocate working arrays 2128 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2129 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2130 */ 2131 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr); 2132 2133 do { 2134 sctx.newshift = PETSC_FALSE; 2135 2136 for (i=0; i<mbs; i++) c2r[i] = mbs; 2137 if (mbs) il[0] = 0; 2138 2139 for (k = 0; k<mbs; k++) { 2140 /* zero rtmp */ 2141 nz = bi[k+1] - bi[k]; 2142 bjtmp = bj + bi[k]; 2143 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2144 2145 /* load in initial unfactored row */ 2146 bval = ba + bi[k]; 2147 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2148 for (j = jmin; j < jmax; j++) { 2149 col = riip[aj[j]]; 2150 if (col >= k) { /* only take upper triangular entry */ 2151 rtmp[col] = aa[j]; 2152 *bval++ = 0.0; /* for in-place factorization */ 2153 } 2154 } 2155 /* shift the diagonal of the matrix: ZeropivotApply() */ 2156 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2157 2158 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2159 dk = rtmp[k]; 2160 i = c2r[k]; /* first row to be added to k_th row */ 2161 2162 while (i < k) { 2163 nexti = c2r[i]; /* next row to be added to k_th row */ 2164 2165 /* compute multiplier, update diag(k) and U(i,k) */ 2166 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2167 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2168 dk += uikdi*ba[ili]; /* update diag[k] */ 2169 ba[ili] = uikdi; /* -U(i,k) */ 2170 2171 /* add multiple of row i to k-th row */ 2172 jmin = ili + 1; jmax = bi[i+1]; 2173 if (jmin < jmax) { 2174 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2175 /* update il and c2r for row i */ 2176 il[i] = jmin; 2177 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2178 } 2179 i = nexti; 2180 } 2181 2182 /* copy data into U(k,:) */ 2183 rs = 0.0; 2184 jmin = bi[k]; jmax = bi[k+1]-1; 2185 if (jmin < jmax) { 2186 for (j=jmin; j<jmax; j++) { 2187 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2188 } 2189 /* add the k-th row into il and c2r */ 2190 il[k] = jmin; 2191 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2192 } 2193 2194 /* MatPivotCheck() */ 2195 sctx.rs = rs; 2196 sctx.pv = dk; 2197 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 2198 if (sctx.newshift) break; 2199 dk = sctx.pv; 2200 2201 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2202 } 2203 } while (sctx.newshift); 2204 2205 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2206 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2207 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2208 2209 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2210 if (perm_identity) { 2211 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2212 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2213 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2214 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2215 } else { 2216 B->ops->solve = MatSolve_SeqSBAIJ_1; 2217 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2218 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2219 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2220 } 2221 2222 C->assembled = PETSC_TRUE; 2223 C->preallocated = PETSC_TRUE; 2224 2225 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2226 2227 /* MatPivotView() */ 2228 if (sctx.nshift) { 2229 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2230 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 2231 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2232 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2233 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2234 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 2235 } 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2242 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2243 { 2244 Mat C = B; 2245 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2246 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2247 IS ip=b->row,iip = b->icol; 2248 PetscErrorCode ierr; 2249 const PetscInt *rip,*riip; 2250 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2251 PetscInt *ai=a->i,*aj=a->j; 2252 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2253 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2254 PetscBool perm_identity; 2255 FactorShiftCtx sctx; 2256 PetscReal rs; 2257 MatScalar d,*v; 2258 2259 PetscFunctionBegin; 2260 /* MatPivotSetUp(): initialize shift context sctx */ 2261 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2262 2263 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2264 sctx.shift_top = info->zeropivot; 2265 for (i=0; i<mbs; i++) { 2266 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2267 d = (aa)[a->diag[i]]; 2268 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2269 v = aa+ai[i]; 2270 nz = ai[i+1] - ai[i]; 2271 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2272 if (rs>sctx.shift_top) sctx.shift_top = rs; 2273 } 2274 sctx.shift_top *= 1.1; 2275 sctx.nshift_max = 5; 2276 sctx.shift_lo = 0.; 2277 sctx.shift_hi = 1.; 2278 } 2279 2280 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2281 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2282 2283 /* initialization */ 2284 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 2285 2286 do { 2287 sctx.newshift = PETSC_FALSE; 2288 2289 for (i=0; i<mbs; i++) jl[i] = mbs; 2290 il[0] = 0; 2291 2292 for (k = 0; k<mbs; k++) { 2293 /* zero rtmp */ 2294 nz = bi[k+1] - bi[k]; 2295 bjtmp = bj + bi[k]; 2296 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2297 2298 bval = ba + bi[k]; 2299 /* initialize k-th row by the perm[k]-th row of A */ 2300 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2301 for (j = jmin; j < jmax; j++) { 2302 col = riip[aj[j]]; 2303 if (col >= k) { /* only take upper triangular entry */ 2304 rtmp[col] = aa[j]; 2305 *bval++ = 0.0; /* for in-place factorization */ 2306 } 2307 } 2308 /* shift the diagonal of the matrix */ 2309 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2310 2311 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2312 dk = rtmp[k]; 2313 i = jl[k]; /* first row to be added to k_th row */ 2314 2315 while (i < k) { 2316 nexti = jl[i]; /* next row to be added to k_th row */ 2317 2318 /* compute multiplier, update diag(k) and U(i,k) */ 2319 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2320 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2321 dk += uikdi*ba[ili]; 2322 ba[ili] = uikdi; /* -U(i,k) */ 2323 2324 /* add multiple of row i to k-th row */ 2325 jmin = ili + 1; jmax = bi[i+1]; 2326 if (jmin < jmax) { 2327 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2328 /* update il and jl for row i */ 2329 il[i] = jmin; 2330 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2331 } 2332 i = nexti; 2333 } 2334 2335 /* shift the diagonals when zero pivot is detected */ 2336 /* compute rs=sum of abs(off-diagonal) */ 2337 rs = 0.0; 2338 jmin = bi[k]+1; 2339 nz = bi[k+1] - jmin; 2340 bcol = bj + jmin; 2341 for (j=0; j<nz; j++) { 2342 rs += PetscAbsScalar(rtmp[bcol[j]]); 2343 } 2344 2345 sctx.rs = rs; 2346 sctx.pv = dk; 2347 ierr = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr); 2348 if (sctx.newshift) break; 2349 dk = sctx.pv; 2350 2351 /* copy data into U(k,:) */ 2352 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2353 jmin = bi[k]+1; jmax = bi[k+1]; 2354 if (jmin < jmax) { 2355 for (j=jmin; j<jmax; j++) { 2356 col = bj[j]; ba[j] = rtmp[col]; 2357 } 2358 /* add the k-th row into il and jl */ 2359 il[k] = jmin; 2360 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2361 } 2362 } 2363 } while (sctx.newshift); 2364 2365 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2366 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2367 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2368 2369 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2370 if (perm_identity) { 2371 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2372 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2373 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2374 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2375 } else { 2376 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2377 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2378 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2379 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2380 } 2381 2382 C->assembled = PETSC_TRUE; 2383 C->preallocated = PETSC_TRUE; 2384 2385 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2386 if (sctx.nshift) { 2387 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2388 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2389 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2390 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2391 } 2392 } 2393 PetscFunctionReturn(0); 2394 } 2395 2396 /* 2397 icc() under revised new data structure. 2398 Factored arrays bj and ba are stored as 2399 U(0,:),...,U(i,:),U(n-1,:) 2400 2401 ui=fact->i is an array of size n+1, in which 2402 ui+ 2403 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2404 ui[n]: points to U(n-1,n-1)+1 2405 2406 udiag=fact->diag is an array of size n,in which 2407 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2408 2409 U(i,:) contains udiag[i] as its last entry, i.e., 2410 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2411 */ 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2415 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2416 { 2417 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2418 Mat_SeqSBAIJ *b; 2419 PetscErrorCode ierr; 2420 PetscBool perm_identity,missing; 2421 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2422 const PetscInt *rip,*riip; 2423 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2424 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2425 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2426 PetscReal fill =info->fill,levels=info->levels; 2427 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2428 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2429 PetscBT lnkbt; 2430 IS iperm; 2431 2432 PetscFunctionBegin; 2433 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2434 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2435 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2436 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2437 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2438 2439 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2440 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2441 ui[0] = 0; 2442 2443 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2444 if (!levels && perm_identity) { 2445 for (i=0; i<am; i++) { 2446 ncols = ai[i+1] - a->diag[i]; 2447 ui[i+1] = ui[i] + ncols; 2448 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2449 } 2450 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2451 cols = uj; 2452 for (i=0; i<am; i++) { 2453 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2454 ncols = ai[i+1] - a->diag[i] -1; 2455 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2456 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2457 } 2458 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2459 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2460 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2461 2462 /* initialization */ 2463 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2464 2465 /* jl: linked list for storing indices of the pivot rows 2466 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2467 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2468 for (i=0; i<am; i++) { 2469 jl[i] = am; il[i] = 0; 2470 } 2471 2472 /* create and initialize a linked list for storing column indices of the active row k */ 2473 nlnk = am + 1; 2474 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2475 2476 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2477 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2478 current_space = free_space; 2479 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2480 current_space_lvl = free_space_lvl; 2481 2482 for (k=0; k<am; k++) { /* for each active row k */ 2483 /* initialize lnk by the column indices of row rip[k] of A */ 2484 nzk = 0; 2485 ncols = ai[rip[k]+1] - ai[rip[k]]; 2486 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2487 ncols_upper = 0; 2488 for (j=0; j<ncols; j++) { 2489 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2490 if (riip[i] >= k) { /* only take upper triangular entry */ 2491 ajtmp[ncols_upper] = i; 2492 ncols_upper++; 2493 } 2494 } 2495 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2496 nzk += nlnk; 2497 2498 /* update lnk by computing fill-in for each pivot row to be merged in */ 2499 prow = jl[k]; /* 1st pivot row */ 2500 2501 while (prow < k) { 2502 nextprow = jl[prow]; 2503 2504 /* merge prow into k-th row */ 2505 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2506 jmax = ui[prow+1]; 2507 ncols = jmax-jmin; 2508 i = jmin - ui[prow]; 2509 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2510 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2511 j = *(uj - 1); 2512 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2513 nzk += nlnk; 2514 2515 /* update il and jl for prow */ 2516 if (jmin < jmax) { 2517 il[prow] = jmin; 2518 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2519 } 2520 prow = nextprow; 2521 } 2522 2523 /* if free space is not available, make more free space */ 2524 if (current_space->local_remaining<nzk) { 2525 i = am - k + 1; /* num of unfactored rows */ 2526 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2527 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2528 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2529 reallocs++; 2530 } 2531 2532 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2533 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2534 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2535 2536 /* add the k-th row into il and jl */ 2537 if (nzk > 1) { 2538 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2539 jl[k] = jl[i]; jl[i] = k; 2540 il[k] = ui[k] + 1; 2541 } 2542 uj_ptr[k] = current_space->array; 2543 uj_lvl_ptr[k] = current_space_lvl->array; 2544 2545 current_space->array += nzk; 2546 current_space->local_used += nzk; 2547 current_space->local_remaining -= nzk; 2548 2549 current_space_lvl->array += nzk; 2550 current_space_lvl->local_used += nzk; 2551 current_space_lvl->local_remaining -= nzk; 2552 2553 ui[k+1] = ui[k] + nzk; 2554 } 2555 2556 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2557 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2558 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2559 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2560 2561 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2562 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2563 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2564 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2565 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2566 2567 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2568 2569 /* put together the new matrix in MATSEQSBAIJ format */ 2570 b = (Mat_SeqSBAIJ*)(fact)->data; 2571 b->singlemalloc = PETSC_FALSE; 2572 2573 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2574 2575 b->j = uj; 2576 b->i = ui; 2577 b->diag = udiag; 2578 b->free_diag = PETSC_TRUE; 2579 b->ilen = 0; 2580 b->imax = 0; 2581 b->row = perm; 2582 b->col = perm; 2583 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2584 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2585 b->icol = iperm; 2586 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2587 2588 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2589 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2590 2591 b->maxnz = b->nz = ui[am]; 2592 b->free_a = PETSC_TRUE; 2593 b->free_ij = PETSC_TRUE; 2594 2595 fact->info.factor_mallocs = reallocs; 2596 fact->info.fill_ratio_given = fill; 2597 if (ai[am] != 0) { 2598 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2599 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2600 } else { 2601 fact->info.fill_ratio_needed = 0.0; 2602 } 2603 #if defined(PETSC_USE_INFO) 2604 if (ai[am] != 0) { 2605 PetscReal af = fact->info.fill_ratio_needed; 2606 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2607 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2608 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2609 } else { 2610 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2611 } 2612 #endif 2613 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2614 PetscFunctionReturn(0); 2615 } 2616 2617 #undef __FUNCT__ 2618 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2619 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2620 { 2621 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2622 Mat_SeqSBAIJ *b; 2623 PetscErrorCode ierr; 2624 PetscBool perm_identity,missing; 2625 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2626 const PetscInt *rip,*riip; 2627 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2628 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2629 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2630 PetscReal fill =info->fill,levels=info->levels; 2631 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2632 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2633 PetscBT lnkbt; 2634 IS iperm; 2635 2636 PetscFunctionBegin; 2637 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2638 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2639 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2640 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2641 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2642 2643 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2644 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2645 ui[0] = 0; 2646 2647 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2648 if (!levels && perm_identity) { 2649 2650 for (i=0; i<am; i++) { 2651 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2652 udiag[i] = ui[i]; 2653 } 2654 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2655 cols = uj; 2656 for (i=0; i<am; i++) { 2657 aj = a->j + a->diag[i]; 2658 ncols = ui[i+1] - ui[i]; 2659 for (j=0; j<ncols; j++) *cols++ = *aj++; 2660 } 2661 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2662 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2663 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2664 2665 /* initialization */ 2666 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2667 2668 /* jl: linked list for storing indices of the pivot rows 2669 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2670 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2671 for (i=0; i<am; i++) { 2672 jl[i] = am; il[i] = 0; 2673 } 2674 2675 /* create and initialize a linked list for storing column indices of the active row k */ 2676 nlnk = am + 1; 2677 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2678 2679 /* initial FreeSpace size is fill*(ai[am]+1) */ 2680 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 2681 current_space = free_space; 2682 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr); 2683 current_space_lvl = free_space_lvl; 2684 2685 for (k=0; k<am; k++) { /* for each active row k */ 2686 /* initialize lnk by the column indices of row rip[k] of A */ 2687 nzk = 0; 2688 ncols = ai[rip[k]+1] - ai[rip[k]]; 2689 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2690 ncols_upper = 0; 2691 for (j=0; j<ncols; j++) { 2692 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2693 if (riip[i] >= k) { /* only take upper triangular entry */ 2694 ajtmp[ncols_upper] = i; 2695 ncols_upper++; 2696 } 2697 } 2698 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2699 nzk += nlnk; 2700 2701 /* update lnk by computing fill-in for each pivot row to be merged in */ 2702 prow = jl[k]; /* 1st pivot row */ 2703 2704 while (prow < k) { 2705 nextprow = jl[prow]; 2706 2707 /* merge prow into k-th row */ 2708 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2709 jmax = ui[prow+1]; 2710 ncols = jmax-jmin; 2711 i = jmin - ui[prow]; 2712 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2713 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2714 j = *(uj - 1); 2715 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2716 nzk += nlnk; 2717 2718 /* update il and jl for prow */ 2719 if (jmin < jmax) { 2720 il[prow] = jmin; 2721 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2722 } 2723 prow = nextprow; 2724 } 2725 2726 /* if free space is not available, make more free space */ 2727 if (current_space->local_remaining<nzk) { 2728 i = am - k + 1; /* num of unfactored rows */ 2729 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2730 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2731 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2732 reallocs++; 2733 } 2734 2735 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2736 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2737 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2738 2739 /* add the k-th row into il and jl */ 2740 if (nzk > 1) { 2741 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2742 jl[k] = jl[i]; jl[i] = k; 2743 il[k] = ui[k] + 1; 2744 } 2745 uj_ptr[k] = current_space->array; 2746 uj_lvl_ptr[k] = current_space_lvl->array; 2747 2748 current_space->array += nzk; 2749 current_space->local_used += nzk; 2750 current_space->local_remaining -= nzk; 2751 2752 current_space_lvl->array += nzk; 2753 current_space_lvl->local_used += nzk; 2754 current_space_lvl->local_remaining -= nzk; 2755 2756 ui[k+1] = ui[k] + nzk; 2757 } 2758 2759 #if defined(PETSC_USE_INFO) 2760 if (ai[am] != 0) { 2761 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2762 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2763 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2764 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2765 } else { 2766 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2767 } 2768 #endif 2769 2770 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2771 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2772 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2773 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2774 2775 /* destroy list of free space and other temporary array(s) */ 2776 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2777 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2778 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2779 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2780 2781 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2782 2783 /* put together the new matrix in MATSEQSBAIJ format */ 2784 2785 b = (Mat_SeqSBAIJ*)fact->data; 2786 b->singlemalloc = PETSC_FALSE; 2787 2788 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2789 2790 b->j = uj; 2791 b->i = ui; 2792 b->diag = udiag; 2793 b->free_diag = PETSC_TRUE; 2794 b->ilen = 0; 2795 b->imax = 0; 2796 b->row = perm; 2797 b->col = perm; 2798 2799 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2800 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2801 2802 b->icol = iperm; 2803 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2804 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2805 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2806 b->maxnz = b->nz = ui[am]; 2807 b->free_a = PETSC_TRUE; 2808 b->free_ij = PETSC_TRUE; 2809 2810 fact->info.factor_mallocs = reallocs; 2811 fact->info.fill_ratio_given = fill; 2812 if (ai[am] != 0) { 2813 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2814 } else { 2815 fact->info.fill_ratio_needed = 0.0; 2816 } 2817 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2818 PetscFunctionReturn(0); 2819 } 2820 2821 #undef __FUNCT__ 2822 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2823 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2824 { 2825 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2826 Mat_SeqSBAIJ *b; 2827 PetscErrorCode ierr; 2828 PetscBool perm_identity,missing; 2829 PetscReal fill = info->fill; 2830 const PetscInt *rip,*riip; 2831 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2832 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2833 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2834 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2835 PetscBT lnkbt; 2836 IS iperm; 2837 2838 PetscFunctionBegin; 2839 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2840 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2841 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2842 2843 /* check whether perm is the identity mapping */ 2844 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2845 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2846 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2847 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2848 2849 /* initialization */ 2850 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2851 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2852 ui[0] = 0; 2853 2854 /* jl: linked list for storing indices of the pivot rows 2855 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2856 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2857 for (i=0; i<am; i++) { 2858 jl[i] = am; il[i] = 0; 2859 } 2860 2861 /* create and initialize a linked list for storing column indices of the active row k */ 2862 nlnk = am + 1; 2863 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2864 2865 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2866 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2867 current_space = free_space; 2868 2869 for (k=0; k<am; k++) { /* for each active row k */ 2870 /* initialize lnk by the column indices of row rip[k] of A */ 2871 nzk = 0; 2872 ncols = ai[rip[k]+1] - ai[rip[k]]; 2873 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2874 ncols_upper = 0; 2875 for (j=0; j<ncols; j++) { 2876 i = riip[*(aj + ai[rip[k]] + j)]; 2877 if (i >= k) { /* only take upper triangular entry */ 2878 cols[ncols_upper] = i; 2879 ncols_upper++; 2880 } 2881 } 2882 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2883 nzk += nlnk; 2884 2885 /* update lnk by computing fill-in for each pivot row to be merged in */ 2886 prow = jl[k]; /* 1st pivot row */ 2887 2888 while (prow < k) { 2889 nextprow = jl[prow]; 2890 /* merge prow into k-th row */ 2891 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2892 jmax = ui[prow+1]; 2893 ncols = jmax-jmin; 2894 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2895 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2896 nzk += nlnk; 2897 2898 /* update il and jl for prow */ 2899 if (jmin < jmax) { 2900 il[prow] = jmin; 2901 j = *uj_ptr; 2902 jl[prow] = jl[j]; 2903 jl[j] = prow; 2904 } 2905 prow = nextprow; 2906 } 2907 2908 /* if free space is not available, make more free space */ 2909 if (current_space->local_remaining<nzk) { 2910 i = am - k + 1; /* num of unfactored rows */ 2911 i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2912 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2913 reallocs++; 2914 } 2915 2916 /* copy data into free space, then initialize lnk */ 2917 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2918 2919 /* add the k-th row into il and jl */ 2920 if (nzk > 1) { 2921 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2922 jl[k] = jl[i]; jl[i] = k; 2923 il[k] = ui[k] + 1; 2924 } 2925 ui_ptr[k] = current_space->array; 2926 2927 current_space->array += nzk; 2928 current_space->local_used += nzk; 2929 current_space->local_remaining -= nzk; 2930 2931 ui[k+1] = ui[k] + nzk; 2932 } 2933 2934 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2935 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2936 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2937 2938 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2939 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2940 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2941 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2942 2943 /* put together the new matrix in MATSEQSBAIJ format */ 2944 2945 b = (Mat_SeqSBAIJ*)fact->data; 2946 b->singlemalloc = PETSC_FALSE; 2947 b->free_a = PETSC_TRUE; 2948 b->free_ij = PETSC_TRUE; 2949 2950 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2951 2952 b->j = uj; 2953 b->i = ui; 2954 b->diag = udiag; 2955 b->free_diag = PETSC_TRUE; 2956 b->ilen = 0; 2957 b->imax = 0; 2958 b->row = perm; 2959 b->col = perm; 2960 2961 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2962 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2963 2964 b->icol = iperm; 2965 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2966 2967 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2968 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2969 2970 b->maxnz = b->nz = ui[am]; 2971 2972 fact->info.factor_mallocs = reallocs; 2973 fact->info.fill_ratio_given = fill; 2974 if (ai[am] != 0) { 2975 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2976 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2977 } else { 2978 fact->info.fill_ratio_needed = 0.0; 2979 } 2980 #if defined(PETSC_USE_INFO) 2981 if (ai[am] != 0) { 2982 PetscReal af = fact->info.fill_ratio_needed; 2983 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2984 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2985 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2986 } else { 2987 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2988 } 2989 #endif 2990 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2991 PetscFunctionReturn(0); 2992 } 2993 2994 #undef __FUNCT__ 2995 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2996 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2997 { 2998 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2999 Mat_SeqSBAIJ *b; 3000 PetscErrorCode ierr; 3001 PetscBool perm_identity,missing; 3002 PetscReal fill = info->fill; 3003 const PetscInt *rip,*riip; 3004 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 3005 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 3006 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 3007 PetscFreeSpaceList free_space=NULL,current_space=NULL; 3008 PetscBT lnkbt; 3009 IS iperm; 3010 3011 PetscFunctionBegin; 3012 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 3013 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3014 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3015 3016 /* check whether perm is the identity mapping */ 3017 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 3018 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 3019 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 3020 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 3021 3022 /* initialization */ 3023 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 3024 ui[0] = 0; 3025 3026 /* jl: linked list for storing indices of the pivot rows 3027 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3028 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 3029 for (i=0; i<am; i++) { 3030 jl[i] = am; il[i] = 0; 3031 } 3032 3033 /* create and initialize a linked list for storing column indices of the active row k */ 3034 nlnk = am + 1; 3035 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3036 3037 /* initial FreeSpace size is fill*(ai[am]+1) */ 3038 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 3039 current_space = free_space; 3040 3041 for (k=0; k<am; k++) { /* for each active row k */ 3042 /* initialize lnk by the column indices of row rip[k] of A */ 3043 nzk = 0; 3044 ncols = ai[rip[k]+1] - ai[rip[k]]; 3045 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3046 ncols_upper = 0; 3047 for (j=0; j<ncols; j++) { 3048 i = riip[*(aj + ai[rip[k]] + j)]; 3049 if (i >= k) { /* only take upper triangular entry */ 3050 cols[ncols_upper] = i; 3051 ncols_upper++; 3052 } 3053 } 3054 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3055 nzk += nlnk; 3056 3057 /* update lnk by computing fill-in for each pivot row to be merged in */ 3058 prow = jl[k]; /* 1st pivot row */ 3059 3060 while (prow < k) { 3061 nextprow = jl[prow]; 3062 /* merge prow into k-th row */ 3063 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3064 jmax = ui[prow+1]; 3065 ncols = jmax-jmin; 3066 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3067 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3068 nzk += nlnk; 3069 3070 /* update il and jl for prow */ 3071 if (jmin < jmax) { 3072 il[prow] = jmin; 3073 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3074 } 3075 prow = nextprow; 3076 } 3077 3078 /* if free space is not available, make more free space */ 3079 if (current_space->local_remaining<nzk) { 3080 i = am - k + 1; /* num of unfactored rows */ 3081 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3082 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3083 reallocs++; 3084 } 3085 3086 /* copy data into free space, then initialize lnk */ 3087 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3088 3089 /* add the k-th row into il and jl */ 3090 if (nzk-1 > 0) { 3091 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3092 jl[k] = jl[i]; jl[i] = k; 3093 il[k] = ui[k] + 1; 3094 } 3095 ui_ptr[k] = current_space->array; 3096 3097 current_space->array += nzk; 3098 current_space->local_used += nzk; 3099 current_space->local_remaining -= nzk; 3100 3101 ui[k+1] = ui[k] + nzk; 3102 } 3103 3104 #if defined(PETSC_USE_INFO) 3105 if (ai[am] != 0) { 3106 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3107 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3108 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3109 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3110 } else { 3111 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3112 } 3113 #endif 3114 3115 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3116 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3117 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3118 3119 /* destroy list of free space and other temporary array(s) */ 3120 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 3121 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3122 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3123 3124 /* put together the new matrix in MATSEQSBAIJ format */ 3125 3126 b = (Mat_SeqSBAIJ*)fact->data; 3127 b->singlemalloc = PETSC_FALSE; 3128 b->free_a = PETSC_TRUE; 3129 b->free_ij = PETSC_TRUE; 3130 3131 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 3132 3133 b->j = uj; 3134 b->i = ui; 3135 b->diag = 0; 3136 b->ilen = 0; 3137 b->imax = 0; 3138 b->row = perm; 3139 b->col = perm; 3140 3141 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3142 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3143 3144 b->icol = iperm; 3145 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3146 3147 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 3148 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3149 b->maxnz = b->nz = ui[am]; 3150 3151 fact->info.factor_mallocs = reallocs; 3152 fact->info.fill_ratio_given = fill; 3153 if (ai[am] != 0) { 3154 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3155 } else { 3156 fact->info.fill_ratio_needed = 0.0; 3157 } 3158 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3159 PetscFunctionReturn(0); 3160 } 3161 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3164 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3165 { 3166 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3167 PetscErrorCode ierr; 3168 PetscInt n = A->rmap->n; 3169 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3170 PetscScalar *x,sum; 3171 const PetscScalar *b; 3172 const MatScalar *aa = a->a,*v; 3173 PetscInt i,nz; 3174 3175 PetscFunctionBegin; 3176 if (!n) PetscFunctionReturn(0); 3177 3178 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3179 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3180 3181 /* forward solve the lower triangular */ 3182 x[0] = b[0]; 3183 v = aa; 3184 vi = aj; 3185 for (i=1; i<n; i++) { 3186 nz = ai[i+1] - ai[i]; 3187 sum = b[i]; 3188 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3189 v += nz; 3190 vi += nz; 3191 x[i] = sum; 3192 } 3193 3194 /* backward solve the upper triangular */ 3195 for (i=n-1; i>=0; i--) { 3196 v = aa + adiag[i+1] + 1; 3197 vi = aj + adiag[i+1] + 1; 3198 nz = adiag[i] - adiag[i+1]-1; 3199 sum = x[i]; 3200 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3201 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3202 } 3203 3204 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3205 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3206 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3207 PetscFunctionReturn(0); 3208 } 3209 3210 #undef __FUNCT__ 3211 #define __FUNCT__ "MatSolve_SeqAIJ" 3212 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3213 { 3214 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3215 IS iscol = a->col,isrow = a->row; 3216 PetscErrorCode ierr; 3217 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3218 const PetscInt *rout,*cout,*r,*c; 3219 PetscScalar *x,*tmp,sum; 3220 const PetscScalar *b; 3221 const MatScalar *aa = a->a,*v; 3222 3223 PetscFunctionBegin; 3224 if (!n) PetscFunctionReturn(0); 3225 3226 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3227 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3228 tmp = a->solve_work; 3229 3230 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3231 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3232 3233 /* forward solve the lower triangular */ 3234 tmp[0] = b[r[0]]; 3235 v = aa; 3236 vi = aj; 3237 for (i=1; i<n; i++) { 3238 nz = ai[i+1] - ai[i]; 3239 sum = b[r[i]]; 3240 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3241 tmp[i] = sum; 3242 v += nz; vi += nz; 3243 } 3244 3245 /* backward solve the upper triangular */ 3246 for (i=n-1; i>=0; i--) { 3247 v = aa + adiag[i+1]+1; 3248 vi = aj + adiag[i+1]+1; 3249 nz = adiag[i]-adiag[i+1]-1; 3250 sum = tmp[i]; 3251 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3252 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3253 } 3254 3255 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3256 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3257 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3258 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3259 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3260 PetscFunctionReturn(0); 3261 } 3262 3263 #undef __FUNCT__ 3264 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3265 /* 3266 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3267 */ 3268 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3269 { 3270 Mat B = *fact; 3271 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3272 IS isicol; 3273 PetscErrorCode ierr; 3274 const PetscInt *r,*ic; 3275 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3276 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3277 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3278 PetscInt nlnk,*lnk; 3279 PetscBT lnkbt; 3280 PetscBool row_identity,icol_identity; 3281 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3282 const PetscInt *ics; 3283 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3284 PetscReal dt =info->dt,shift=info->shiftamount; 3285 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3286 PetscBool missing; 3287 3288 PetscFunctionBegin; 3289 if (dt == PETSC_DEFAULT) dt = 0.005; 3290 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3291 3292 /* ------- symbolic factorization, can be reused ---------*/ 3293 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3294 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3295 adiag=a->diag; 3296 3297 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3298 3299 /* bdiag is location of diagonal in factor */ 3300 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3301 ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3302 3303 /* allocate row pointers bi */ 3304 ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr); 3305 3306 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3307 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3308 nnz_max = ai[n]+2*n*dtcount+2; 3309 3310 ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr); 3311 ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr); 3312 3313 /* put together the new matrix */ 3314 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3315 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 3316 b = (Mat_SeqAIJ*)B->data; 3317 3318 b->free_a = PETSC_TRUE; 3319 b->free_ij = PETSC_TRUE; 3320 b->singlemalloc = PETSC_FALSE; 3321 3322 b->a = ba; 3323 b->j = bj; 3324 b->i = bi; 3325 b->diag = bdiag; 3326 b->ilen = 0; 3327 b->imax = 0; 3328 b->row = isrow; 3329 b->col = iscol; 3330 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3331 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3332 b->icol = isicol; 3333 3334 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 3335 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3336 b->maxnz = nnz_max; 3337 3338 B->factortype = MAT_FACTOR_ILUDT; 3339 B->info.factor_mallocs = 0; 3340 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3341 /* ------- end of symbolic factorization ---------*/ 3342 3343 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3344 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3345 ics = ic; 3346 3347 /* linked list for storing column indices of the active row */ 3348 nlnk = n + 1; 3349 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3350 3351 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3352 ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr); 3353 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3354 ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr); 3355 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3356 3357 bi[0] = 0; 3358 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3359 bdiag_rev[n] = bdiag[0]; 3360 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3361 for (i=0; i<n; i++) { 3362 /* copy initial fill into linked list */ 3363 nzi = ai[r[i]+1] - ai[r[i]]; 3364 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3365 nzi_al = adiag[r[i]] - ai[r[i]]; 3366 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3367 ajtmp = aj + ai[r[i]]; 3368 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3369 3370 /* load in initial (unfactored row) */ 3371 aatmp = a->a + ai[r[i]]; 3372 for (j=0; j<nzi; j++) { 3373 rtmp[ics[*ajtmp++]] = *aatmp++; 3374 } 3375 3376 /* add pivot rows into linked list */ 3377 row = lnk[n]; 3378 while (row < i) { 3379 nzi_bl = bi[row+1] - bi[row] + 1; 3380 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3381 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3382 nzi += nlnk; 3383 row = lnk[row]; 3384 } 3385 3386 /* copy data from lnk into jtmp, then initialize lnk */ 3387 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3388 3389 /* numerical factorization */ 3390 bjtmp = jtmp; 3391 row = *bjtmp++; /* 1st pivot row */ 3392 while (row < i) { 3393 pc = rtmp + row; 3394 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3395 multiplier = (*pc) * (*pv); 3396 *pc = multiplier; 3397 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3398 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3399 pv = ba + bdiag[row+1] + 1; 3400 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3401 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3402 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3403 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3404 } 3405 row = *bjtmp++; 3406 } 3407 3408 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3409 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3410 nzi_bl = 0; j = 0; 3411 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3412 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3413 nzi_bl++; j++; 3414 } 3415 nzi_bu = nzi - nzi_bl -1; 3416 while (j < nzi) { 3417 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3418 j++; 3419 } 3420 3421 bjtmp = bj + bi[i]; 3422 batmp = ba + bi[i]; 3423 /* apply level dropping rule to L part */ 3424 ncut = nzi_al + dtcount; 3425 if (ncut < nzi_bl) { 3426 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3427 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3428 } else { 3429 ncut = nzi_bl; 3430 } 3431 for (j=0; j<ncut; j++) { 3432 bjtmp[j] = jtmp[j]; 3433 batmp[j] = vtmp[j]; 3434 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3435 } 3436 bi[i+1] = bi[i] + ncut; 3437 nzi = ncut + 1; 3438 3439 /* apply level dropping rule to U part */ 3440 ncut = nzi_au + dtcount; 3441 if (ncut < nzi_bu) { 3442 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3443 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3444 } else { 3445 ncut = nzi_bu; 3446 } 3447 nzi += ncut; 3448 3449 /* mark bdiagonal */ 3450 bdiag[i+1] = bdiag[i] - (ncut + 1); 3451 bdiag_rev[n-i-1] = bdiag[i+1]; 3452 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3453 bjtmp = bj + bdiag[i]; 3454 batmp = ba + bdiag[i]; 3455 *bjtmp = i; 3456 *batmp = diag_tmp; /* rtmp[i]; */ 3457 if (*batmp == 0.0) { 3458 *batmp = dt+shift; 3459 /* printf(" row %d add shift %g\n",i,shift); */ 3460 } 3461 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3462 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3463 3464 bjtmp = bj + bdiag[i+1]+1; 3465 batmp = ba + bdiag[i+1]+1; 3466 for (k=0; k<ncut; k++) { 3467 bjtmp[k] = jtmp[nzi_bl+1+k]; 3468 batmp[k] = vtmp[nzi_bl+1+k]; 3469 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3470 } 3471 /* printf("\n"); */ 3472 3473 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3474 /* 3475 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3476 printf(" ----------------------------\n"); 3477 */ 3478 } /* for (i=0; i<n; i++) */ 3479 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3480 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3481 3482 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3483 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3484 3485 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3486 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3487 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3488 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3489 3490 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3491 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3492 3493 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3494 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3495 if (row_identity && icol_identity) { 3496 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3497 } else { 3498 B->ops->solve = MatSolve_SeqAIJ; 3499 } 3500 3501 B->ops->solveadd = 0; 3502 B->ops->solvetranspose = 0; 3503 B->ops->solvetransposeadd = 0; 3504 B->ops->matsolve = 0; 3505 B->assembled = PETSC_TRUE; 3506 B->preallocated = PETSC_TRUE; 3507 PetscFunctionReturn(0); 3508 } 3509 3510 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3511 #undef __FUNCT__ 3512 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3513 /* 3514 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3515 */ 3516 3517 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3518 { 3519 PetscErrorCode ierr; 3520 3521 PetscFunctionBegin; 3522 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3523 PetscFunctionReturn(0); 3524 } 3525 3526 /* 3527 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3528 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3529 */ 3530 #undef __FUNCT__ 3531 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3532 /* 3533 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3534 */ 3535 3536 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3537 { 3538 Mat C =fact; 3539 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3540 IS isrow = b->row,isicol = b->icol; 3541 PetscErrorCode ierr; 3542 const PetscInt *r,*ic,*ics; 3543 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3544 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3545 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3546 PetscReal dt=info->dt,shift=info->shiftamount; 3547 PetscBool row_identity, col_identity; 3548 3549 PetscFunctionBegin; 3550 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3551 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3552 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 3553 ics = ic; 3554 3555 for (i=0; i<n; i++) { 3556 /* initialize rtmp array */ 3557 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3558 bjtmp = bj + bi[i]; 3559 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3560 rtmp[i] = 0.0; 3561 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3562 bjtmp = bj + bdiag[i+1] + 1; 3563 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3564 3565 /* load in initial unfactored row of A */ 3566 /* printf("row %d\n",i); */ 3567 nz = ai[r[i]+1] - ai[r[i]]; 3568 ajtmp = aj + ai[r[i]]; 3569 v = aa + ai[r[i]]; 3570 for (j=0; j<nz; j++) { 3571 rtmp[ics[*ajtmp++]] = v[j]; 3572 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3573 } 3574 /* printf("\n"); */ 3575 3576 /* numerical factorization */ 3577 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3578 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3579 k = 0; 3580 while (k < nzl) { 3581 row = *bjtmp++; 3582 /* printf(" prow %d\n",row); */ 3583 pc = rtmp + row; 3584 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3585 multiplier = (*pc) * (*pv); 3586 *pc = multiplier; 3587 if (PetscAbsScalar(multiplier) > dt) { 3588 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3589 pv = b->a + bdiag[row+1] + 1; 3590 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3591 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3592 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3593 } 3594 k++; 3595 } 3596 3597 /* finished row so stick it into b->a */ 3598 /* L-part */ 3599 pv = b->a + bi[i]; 3600 pj = bj + bi[i]; 3601 nzl = bi[i+1] - bi[i]; 3602 for (j=0; j<nzl; j++) { 3603 pv[j] = rtmp[pj[j]]; 3604 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3605 } 3606 3607 /* diagonal: invert diagonal entries for simplier triangular solves */ 3608 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3609 b->a[bdiag[i]] = 1.0/rtmp[i]; 3610 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3611 3612 /* U-part */ 3613 pv = b->a + bdiag[i+1] + 1; 3614 pj = bj + bdiag[i+1] + 1; 3615 nzu = bdiag[i] - bdiag[i+1] - 1; 3616 for (j=0; j<nzu; j++) { 3617 pv[j] = rtmp[pj[j]]; 3618 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3619 } 3620 /* printf("\n"); */ 3621 } 3622 3623 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3624 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3625 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3626 3627 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3628 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3629 if (row_identity && col_identity) { 3630 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3631 } else { 3632 C->ops->solve = MatSolve_SeqAIJ; 3633 } 3634 C->ops->solveadd = 0; 3635 C->ops->solvetranspose = 0; 3636 C->ops->solvetransposeadd = 0; 3637 C->ops->matsolve = 0; 3638 C->assembled = PETSC_TRUE; 3639 C->preallocated = PETSC_TRUE; 3640 3641 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3642 PetscFunctionReturn(0); 3643 } 3644