1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 /* 8 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9 10 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 11 */ 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 15 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 16 const PetscInt *ai = a->i, *aj = a->j; 17 const PetscScalar *aa = a->a; 18 PetscBool *done; 19 PetscReal best,past = 0,future; 20 21 PetscFunctionBegin; 22 /* pick initial row */ 23 best = -1; 24 for (i=0; i<n; i++) { 25 future = 0.0; 26 for (j=ai[i]; j<ai[i+1]; j++) { 27 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28 else past = PetscAbsScalar(aa[j]); 29 } 30 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 31 if (past/future > best) { 32 best = past/future; 33 current = i; 34 } 35 } 36 37 PetscCall(PetscMalloc1(n,&done)); 38 PetscCall(PetscArrayzero(done,n)); 39 PetscCall(PetscMalloc1(n,&order)); 40 order[0] = current; 41 for (i=0; i<n-1; i++) { 42 done[current] = PETSC_TRUE; 43 best = -1; 44 /* loop over all neighbors of current pivot */ 45 for (j=ai[current]; j<ai[current+1]; j++) { 46 jj = aj[j]; 47 if (done[jj]) continue; 48 /* loop over columns of potential next row computing weights for below and above diagonal */ 49 past = future = 0.0; 50 for (k=ai[jj]; k<ai[jj+1]; k++) { 51 kk = aj[k]; 52 if (done[kk]) past += PetscAbsScalar(aa[k]); 53 else if (kk != jj) future += PetscAbsScalar(aa[k]); 54 } 55 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 56 if (past/future > best) { 57 best = past/future; 58 newcurrent = jj; 59 } 60 } 61 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 62 best = -1; 63 for (k=0; k<n; k++) { 64 if (done[k]) continue; 65 future = 0.0; 66 past = 0.0; 67 for (j=ai[k]; j<ai[k+1]; j++) { 68 kk = aj[j]; 69 if (done[kk]) past += PetscAbsScalar(aa[j]); 70 else if (kk != k) future += PetscAbsScalar(aa[j]); 71 } 72 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 73 if (past/future > best) { 74 best = past/future; 75 newcurrent = k; 76 } 77 } 78 } 79 PetscCheck(current != newcurrent,PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 80 current = newcurrent; 81 order[i+1] = current; 82 } 83 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow)); 84 *icol = *irow; 85 PetscCall(PetscObjectReference((PetscObject)*irow)); 86 PetscCall(PetscFree(done)); 87 PetscCall(PetscFree(order)); 88 PetscFunctionReturn(0); 89 } 90 91 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A,MatSolverType *type) 92 { 93 PetscFunctionBegin; 94 *type = MATSOLVERPETSC; 95 PetscFunctionReturn(0); 96 } 97 98 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 99 { 100 PetscInt n = A->rmap->n; 101 102 PetscFunctionBegin; 103 #if defined(PETSC_USE_COMPLEX) 104 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC),PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported"); 105 #endif 106 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B)); 107 PetscCall(MatSetSizes(*B,n,n,n,n)); 108 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 109 PetscCall(MatSetType(*B,MATSEQAIJ)); 110 111 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 112 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 113 114 PetscCall(MatSetBlockSizesFromMats(*B,A,A)); 115 PetscCall(PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_LU])); 116 PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILU])); 117 PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 118 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 119 PetscCall(MatSetType(*B,MATSEQSBAIJ)); 120 PetscCall(MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL)); 121 122 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 123 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 124 PetscCall(PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 125 PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ICC])); 126 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 127 (*B)->factortype = ftype; 128 129 PetscCall(PetscFree((*B)->solvertype)); 130 PetscCall(PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype)); 131 (*B)->canuseordering = PETSC_TRUE; 132 PetscCall(PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverType_C",MatFactorGetSolverType_petsc)); 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 137 { 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 139 IS isicol; 140 const PetscInt *r,*ic; 141 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 142 PetscInt *bi,*bj,*ajtmp; 143 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 144 PetscReal f; 145 PetscInt nlnk,*lnk,k,**bi_ptr; 146 PetscFreeSpaceList free_space=NULL,current_space=NULL; 147 PetscBT lnkbt; 148 PetscBool missing; 149 150 PetscFunctionBegin; 151 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 152 PetscCall(MatMissingDiagonal(A,&missing,&i)); 153 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 154 155 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 156 PetscCall(ISGetIndices(isrow,&r)); 157 PetscCall(ISGetIndices(isicol,&ic)); 158 159 /* get new row pointers */ 160 PetscCall(PetscMalloc1(n+1,&bi)); 161 bi[0] = 0; 162 163 /* bdiag is location of diagonal in factor */ 164 PetscCall(PetscMalloc1(n+1,&bdiag)); 165 bdiag[0] = 0; 166 167 /* linked list for storing column indices of the active row */ 168 nlnk = n + 1; 169 PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt)); 170 171 PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im)); 172 173 /* initial FreeSpace size is f*(ai[n]+1) */ 174 f = info->fill; 175 if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 176 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 177 current_space = free_space; 178 179 for (i=0; i<n; i++) { 180 /* copy previous fill into linked list */ 181 nzi = 0; 182 nnz = ai[r[i]+1] - ai[r[i]]; 183 ajtmp = aj + ai[r[i]]; 184 PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt)); 185 nzi += nlnk; 186 187 /* add pivot rows into linked list */ 188 row = lnk[n]; 189 while (row < i) { 190 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 191 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 192 PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im)); 193 nzi += nlnk; 194 row = lnk[row]; 195 } 196 bi[i+1] = bi[i] + nzi; 197 im[i] = nzi; 198 199 /* mark bdiag */ 200 nzbd = 0; 201 nnz = nzi; 202 k = lnk[n]; 203 while (nnz-- && k < i) { 204 nzbd++; 205 k = lnk[k]; 206 } 207 bdiag[i] = bi[i] + nzbd; 208 209 /* if free space is not available, make more free space */ 210 if (current_space->local_remaining<nzi) { 211 nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 212 PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 213 reallocs++; 214 } 215 216 /* copy data into free space, then initialize lnk */ 217 PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt)); 218 219 bi_ptr[i] = current_space->array; 220 current_space->array += nzi; 221 current_space->local_used += nzi; 222 current_space->local_remaining -= nzi; 223 } 224 #if defined(PETSC_USE_INFO) 225 if (ai[n] != 0) { 226 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 227 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 228 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 229 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af)); 230 PetscCall(PetscInfo(A,"for best performance.\n")); 231 } else { 232 PetscCall(PetscInfo(A,"Empty matrix\n")); 233 } 234 #endif 235 236 PetscCall(ISRestoreIndices(isrow,&r)); 237 PetscCall(ISRestoreIndices(isicol,&ic)); 238 239 /* destroy list of free space and other temporary array(s) */ 240 PetscCall(PetscMalloc1(bi[n]+1,&bj)); 241 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 242 PetscCall(PetscLLDestroy(lnk,lnkbt)); 243 PetscCall(PetscFree2(bi_ptr,im)); 244 245 /* put together the new matrix */ 246 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL)); 247 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol)); 248 b = (Mat_SeqAIJ*)(B)->data; 249 250 b->free_a = PETSC_TRUE; 251 b->free_ij = PETSC_TRUE; 252 b->singlemalloc = PETSC_FALSE; 253 254 PetscCall(PetscMalloc1(bi[n]+1,&b->a)); 255 b->j = bj; 256 b->i = bi; 257 b->diag = bdiag; 258 b->ilen = NULL; 259 b->imax = NULL; 260 b->row = isrow; 261 b->col = iscol; 262 PetscCall(PetscObjectReference((PetscObject)isrow)); 263 PetscCall(PetscObjectReference((PetscObject)iscol)); 264 b->icol = isicol; 265 PetscCall(PetscMalloc1(n+1,&b->solve_work)); 266 267 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 268 PetscCall(PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)))); 269 b->maxnz = b->nz = bi[n]; 270 271 (B)->factortype = MAT_FACTOR_LU; 272 (B)->info.factor_mallocs = reallocs; 273 (B)->info.fill_ratio_given = f; 274 275 if (ai[n]) { 276 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 277 } else { 278 (B)->info.fill_ratio_needed = 0.0; 279 } 280 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 281 if (a->inode.size) { 282 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 283 } 284 PetscFunctionReturn(0); 285 } 286 287 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 288 { 289 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 290 IS isicol; 291 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 292 PetscInt i,n=A->rmap->n; 293 PetscInt *bi,*bj; 294 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 295 PetscReal f; 296 PetscInt nlnk,*lnk,k,**bi_ptr; 297 PetscFreeSpaceList free_space=NULL,current_space=NULL; 298 PetscBT lnkbt; 299 PetscBool missing; 300 301 PetscFunctionBegin; 302 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 303 PetscCall(MatMissingDiagonal(A,&missing,&i)); 304 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 305 306 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 307 PetscCall(ISGetIndices(isrow,&r)); 308 PetscCall(ISGetIndices(isicol,&ic)); 309 310 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 311 PetscCall(PetscMalloc1(n+1,&bi)); 312 PetscCall(PetscMalloc1(n+1,&bdiag)); 313 bi[0] = bdiag[0] = 0; 314 315 /* linked list for storing column indices of the active row */ 316 nlnk = n + 1; 317 PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt)); 318 319 PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im)); 320 321 /* initial FreeSpace size is f*(ai[n]+1) */ 322 f = info->fill; 323 if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 324 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 325 current_space = free_space; 326 327 for (i=0; i<n; i++) { 328 /* copy previous fill into linked list */ 329 nzi = 0; 330 nnz = ai[r[i]+1] - ai[r[i]]; 331 ajtmp = aj + ai[r[i]]; 332 PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt)); 333 nzi += nlnk; 334 335 /* add pivot rows into linked list */ 336 row = lnk[n]; 337 while (row < i) { 338 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 339 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 340 PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im)); 341 nzi += nlnk; 342 row = lnk[row]; 343 } 344 bi[i+1] = bi[i] + nzi; 345 im[i] = nzi; 346 347 /* mark bdiag */ 348 nzbd = 0; 349 nnz = nzi; 350 k = lnk[n]; 351 while (nnz-- && k < i) { 352 nzbd++; 353 k = lnk[k]; 354 } 355 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 356 357 /* if free space is not available, make more free space */ 358 if (current_space->local_remaining<nzi) { 359 /* estimated additional space needed */ 360 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi)); 361 PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 362 reallocs++; 363 } 364 365 /* copy data into free space, then initialize lnk */ 366 PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt)); 367 368 bi_ptr[i] = current_space->array; 369 current_space->array += nzi; 370 current_space->local_used += nzi; 371 current_space->local_remaining -= nzi; 372 } 373 374 PetscCall(ISRestoreIndices(isrow,&r)); 375 PetscCall(ISRestoreIndices(isicol,&ic)); 376 377 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 378 PetscCall(PetscMalloc1(bi[n]+1,&bj)); 379 PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag)); 380 PetscCall(PetscLLDestroy(lnk,lnkbt)); 381 PetscCall(PetscFree2(bi_ptr,im)); 382 383 /* put together the new matrix */ 384 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL)); 385 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol)); 386 b = (Mat_SeqAIJ*)(B)->data; 387 388 b->free_a = PETSC_TRUE; 389 b->free_ij = PETSC_TRUE; 390 b->singlemalloc = PETSC_FALSE; 391 392 PetscCall(PetscMalloc1(bdiag[0]+1,&b->a)); 393 394 b->j = bj; 395 b->i = bi; 396 b->diag = bdiag; 397 b->ilen = NULL; 398 b->imax = NULL; 399 b->row = isrow; 400 b->col = iscol; 401 PetscCall(PetscObjectReference((PetscObject)isrow)); 402 PetscCall(PetscObjectReference((PetscObject)iscol)); 403 b->icol = isicol; 404 PetscCall(PetscMalloc1(n+1,&b->solve_work)); 405 406 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 407 PetscCall(PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)))); 408 b->maxnz = b->nz = bdiag[0]+1; 409 410 B->factortype = MAT_FACTOR_LU; 411 B->info.factor_mallocs = reallocs; 412 B->info.fill_ratio_given = f; 413 414 if (ai[n]) { 415 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 416 } else { 417 B->info.fill_ratio_needed = 0.0; 418 } 419 #if defined(PETSC_USE_INFO) 420 if (ai[n] != 0) { 421 PetscReal af = B->info.fill_ratio_needed; 422 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 423 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 424 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af)); 425 PetscCall(PetscInfo(A,"for best performance.\n")); 426 } else { 427 PetscCall(PetscInfo(A,"Empty matrix\n")); 428 } 429 #endif 430 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 431 if (a->inode.size) { 432 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 433 } 434 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 435 PetscFunctionReturn(0); 436 } 437 438 /* 439 Trouble in factorization, should we dump the original matrix? 440 */ 441 PetscErrorCode MatFactorDumpMatrix(Mat A) 442 { 443 PetscBool flg = PETSC_FALSE; 444 445 PetscFunctionBegin; 446 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL)); 447 if (flg) { 448 PetscViewer viewer; 449 char filename[PETSC_MAX_PATH_LEN]; 450 451 PetscCall(PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank)); 452 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer)); 453 PetscCall(MatView(A,viewer)); 454 PetscCall(PetscViewerDestroy(&viewer)); 455 } 456 PetscFunctionReturn(0); 457 } 458 459 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 460 { 461 Mat C =B; 462 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 463 IS isrow = b->row,isicol = b->icol; 464 const PetscInt *r,*ic,*ics; 465 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 466 PetscInt i,j,k,nz,nzL,row,*pj; 467 const PetscInt *ajtmp,*bjtmp; 468 MatScalar *rtmp,*pc,multiplier,*pv; 469 const MatScalar *aa=a->a,*v; 470 PetscBool row_identity,col_identity; 471 FactorShiftCtx sctx; 472 const PetscInt *ddiag; 473 PetscReal rs; 474 MatScalar d; 475 476 PetscFunctionBegin; 477 /* MatPivotSetUp(): initialize shift context sctx */ 478 PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx))); 479 480 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 481 ddiag = a->diag; 482 sctx.shift_top = info->zeropivot; 483 for (i=0; i<n; i++) { 484 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 485 d = (aa)[ddiag[i]]; 486 rs = -PetscAbsScalar(d) - PetscRealPart(d); 487 v = aa+ai[i]; 488 nz = ai[i+1] - ai[i]; 489 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 490 if (rs>sctx.shift_top) sctx.shift_top = rs; 491 } 492 sctx.shift_top *= 1.1; 493 sctx.nshift_max = 5; 494 sctx.shift_lo = 0.; 495 sctx.shift_hi = 1.; 496 } 497 498 PetscCall(ISGetIndices(isrow,&r)); 499 PetscCall(ISGetIndices(isicol,&ic)); 500 PetscCall(PetscMalloc1(n+1,&rtmp)); 501 ics = ic; 502 503 do { 504 sctx.newshift = PETSC_FALSE; 505 for (i=0; i<n; i++) { 506 /* zero rtmp */ 507 /* L part */ 508 nz = bi[i+1] - bi[i]; 509 bjtmp = bj + bi[i]; 510 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 511 512 /* U part */ 513 nz = bdiag[i]-bdiag[i+1]; 514 bjtmp = bj + bdiag[i+1]+1; 515 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 516 517 /* load in initial (unfactored row) */ 518 nz = ai[r[i]+1] - ai[r[i]]; 519 ajtmp = aj + ai[r[i]]; 520 v = aa + ai[r[i]]; 521 for (j=0; j<nz; j++) { 522 rtmp[ics[ajtmp[j]]] = v[j]; 523 } 524 /* ZeropivotApply() */ 525 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 526 527 /* elimination */ 528 bjtmp = bj + bi[i]; 529 row = *bjtmp++; 530 nzL = bi[i+1] - bi[i]; 531 for (k=0; k < nzL; k++) { 532 pc = rtmp + row; 533 if (*pc != 0.0) { 534 pv = b->a + bdiag[row]; 535 multiplier = *pc * (*pv); 536 *pc = multiplier; 537 538 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 539 pv = b->a + bdiag[row+1]+1; 540 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 541 542 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 543 PetscCall(PetscLogFlops(1+2.0*nz)); 544 } 545 row = *bjtmp++; 546 } 547 548 /* finished row so stick it into b->a */ 549 rs = 0.0; 550 /* L part */ 551 pv = b->a + bi[i]; 552 pj = b->j + bi[i]; 553 nz = bi[i+1] - bi[i]; 554 for (j=0; j<nz; j++) { 555 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 556 } 557 558 /* U part */ 559 pv = b->a + bdiag[i+1]+1; 560 pj = b->j + bdiag[i+1]+1; 561 nz = bdiag[i] - bdiag[i+1]-1; 562 for (j=0; j<nz; j++) { 563 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 564 } 565 566 sctx.rs = rs; 567 sctx.pv = rtmp[i]; 568 PetscCall(MatPivotCheck(B,A,info,&sctx,i)); 569 if (sctx.newshift) break; /* break for-loop */ 570 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 571 572 /* Mark diagonal and invert diagonal for simpler triangular solves */ 573 pv = b->a + bdiag[i]; 574 *pv = 1.0/rtmp[i]; 575 576 } /* endof for (i=0; i<n; i++) { */ 577 578 /* MatPivotRefine() */ 579 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 580 /* 581 * if no shift in this attempt & shifting & started shifting & can refine, 582 * then try lower shift 583 */ 584 sctx.shift_hi = sctx.shift_fraction; 585 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 586 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 587 sctx.newshift = PETSC_TRUE; 588 sctx.nshift++; 589 } 590 } while (sctx.newshift); 591 592 PetscCall(PetscFree(rtmp)); 593 PetscCall(ISRestoreIndices(isicol,&ic)); 594 PetscCall(ISRestoreIndices(isrow,&r)); 595 596 PetscCall(ISIdentity(isrow,&row_identity)); 597 PetscCall(ISIdentity(isicol,&col_identity)); 598 if (b->inode.size) { 599 C->ops->solve = MatSolve_SeqAIJ_Inode; 600 } else if (row_identity && col_identity) { 601 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 602 } else { 603 C->ops->solve = MatSolve_SeqAIJ; 604 } 605 C->ops->solveadd = MatSolveAdd_SeqAIJ; 606 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 607 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 608 C->ops->matsolve = MatMatSolve_SeqAIJ; 609 C->assembled = PETSC_TRUE; 610 C->preallocated = PETSC_TRUE; 611 612 PetscCall(PetscLogFlops(C->cmap->n)); 613 614 /* MatShiftView(A,info,&sctx) */ 615 if (sctx.nshift) { 616 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 617 PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top)); 618 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 619 PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 620 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 621 PetscCall(PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount)); 622 } 623 } 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 628 { 629 Mat C =B; 630 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 631 IS isrow = b->row,isicol = b->icol; 632 const PetscInt *r,*ic,*ics; 633 PetscInt nz,row,i,j,n=A->rmap->n,diag; 634 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 635 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 636 MatScalar *pv,*rtmp,*pc,multiplier,d; 637 const MatScalar *v,*aa=a->a; 638 PetscReal rs=0.0; 639 FactorShiftCtx sctx; 640 const PetscInt *ddiag; 641 PetscBool row_identity, col_identity; 642 643 PetscFunctionBegin; 644 /* MatPivotSetUp(): initialize shift context sctx */ 645 PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx))); 646 647 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 648 ddiag = a->diag; 649 sctx.shift_top = info->zeropivot; 650 for (i=0; i<n; i++) { 651 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 652 d = (aa)[ddiag[i]]; 653 rs = -PetscAbsScalar(d) - PetscRealPart(d); 654 v = aa+ai[i]; 655 nz = ai[i+1] - ai[i]; 656 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 657 if (rs>sctx.shift_top) sctx.shift_top = rs; 658 } 659 sctx.shift_top *= 1.1; 660 sctx.nshift_max = 5; 661 sctx.shift_lo = 0.; 662 sctx.shift_hi = 1.; 663 } 664 665 PetscCall(ISGetIndices(isrow,&r)); 666 PetscCall(ISGetIndices(isicol,&ic)); 667 PetscCall(PetscMalloc1(n+1,&rtmp)); 668 ics = ic; 669 670 do { 671 sctx.newshift = PETSC_FALSE; 672 for (i=0; i<n; i++) { 673 nz = bi[i+1] - bi[i]; 674 bjtmp = bj + bi[i]; 675 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 676 677 /* load in initial (unfactored row) */ 678 nz = ai[r[i]+1] - ai[r[i]]; 679 ajtmp = aj + ai[r[i]]; 680 v = aa + ai[r[i]]; 681 for (j=0; j<nz; j++) { 682 rtmp[ics[ajtmp[j]]] = v[j]; 683 } 684 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 685 686 row = *bjtmp++; 687 while (row < i) { 688 pc = rtmp + row; 689 if (*pc != 0.0) { 690 pv = b->a + diag_offset[row]; 691 pj = b->j + diag_offset[row] + 1; 692 multiplier = *pc / *pv++; 693 *pc = multiplier; 694 nz = bi[row+1] - diag_offset[row] - 1; 695 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 696 PetscCall(PetscLogFlops(1+2.0*nz)); 697 } 698 row = *bjtmp++; 699 } 700 /* finished row so stick it into b->a */ 701 pv = b->a + bi[i]; 702 pj = b->j + bi[i]; 703 nz = bi[i+1] - bi[i]; 704 diag = diag_offset[i] - bi[i]; 705 rs = 0.0; 706 for (j=0; j<nz; j++) { 707 pv[j] = rtmp[pj[j]]; 708 rs += PetscAbsScalar(pv[j]); 709 } 710 rs -= PetscAbsScalar(pv[diag]); 711 712 sctx.rs = rs; 713 sctx.pv = pv[diag]; 714 PetscCall(MatPivotCheck(B,A,info,&sctx,i)); 715 if (sctx.newshift) break; 716 pv[diag] = sctx.pv; 717 } 718 719 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 720 /* 721 * if no shift in this attempt & shifting & started shifting & can refine, 722 * then try lower shift 723 */ 724 sctx.shift_hi = sctx.shift_fraction; 725 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 726 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 727 sctx.newshift = PETSC_TRUE; 728 sctx.nshift++; 729 } 730 } while (sctx.newshift); 731 732 /* invert diagonal entries for simpler triangular solves */ 733 for (i=0; i<n; i++) { 734 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 735 } 736 PetscCall(PetscFree(rtmp)); 737 PetscCall(ISRestoreIndices(isicol,&ic)); 738 PetscCall(ISRestoreIndices(isrow,&r)); 739 740 PetscCall(ISIdentity(isrow,&row_identity)); 741 PetscCall(ISIdentity(isicol,&col_identity)); 742 if (row_identity && col_identity) { 743 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 744 } else { 745 C->ops->solve = MatSolve_SeqAIJ_inplace; 746 } 747 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 748 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 749 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 750 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 751 752 C->assembled = PETSC_TRUE; 753 C->preallocated = PETSC_TRUE; 754 755 PetscCall(PetscLogFlops(C->cmap->n)); 756 if (sctx.nshift) { 757 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 758 PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top)); 759 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 760 PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 761 } 762 } 763 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 764 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 765 766 PetscCall(MatSeqAIJCheckInode(C)); 767 PetscFunctionReturn(0); 768 } 769 770 /* 771 This routine implements inplace ILU(0) with row or/and column permutations. 772 Input: 773 A - original matrix 774 Output; 775 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 776 a->j (col index) is permuted by the inverse of colperm, then sorted 777 a->a reordered accordingly with a->j 778 a->diag (ptr to diagonal elements) is updated. 779 */ 780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 781 { 782 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 783 IS isrow = a->row,isicol = a->icol; 784 const PetscInt *r,*ic,*ics; 785 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 786 PetscInt *ajtmp,nz,row; 787 PetscInt *diag = a->diag,nbdiag,*pj; 788 PetscScalar *rtmp,*pc,multiplier,d; 789 MatScalar *pv,*v; 790 PetscReal rs; 791 FactorShiftCtx sctx; 792 const MatScalar *aa=a->a,*vtmp; 793 794 PetscFunctionBegin; 795 PetscCheck(A == B,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 796 797 /* MatPivotSetUp(): initialize shift context sctx */ 798 PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx))); 799 800 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 801 const PetscInt *ddiag = a->diag; 802 sctx.shift_top = info->zeropivot; 803 for (i=0; i<n; i++) { 804 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 805 d = (aa)[ddiag[i]]; 806 rs = -PetscAbsScalar(d) - PetscRealPart(d); 807 vtmp = aa+ai[i]; 808 nz = ai[i+1] - ai[i]; 809 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 810 if (rs>sctx.shift_top) sctx.shift_top = rs; 811 } 812 sctx.shift_top *= 1.1; 813 sctx.nshift_max = 5; 814 sctx.shift_lo = 0.; 815 sctx.shift_hi = 1.; 816 } 817 818 PetscCall(ISGetIndices(isrow,&r)); 819 PetscCall(ISGetIndices(isicol,&ic)); 820 PetscCall(PetscMalloc1(n+1,&rtmp)); 821 PetscCall(PetscArrayzero(rtmp,n+1)); 822 ics = ic; 823 824 #if defined(MV) 825 sctx.shift_top = 0.; 826 sctx.nshift_max = 0; 827 sctx.shift_lo = 0.; 828 sctx.shift_hi = 0.; 829 sctx.shift_fraction = 0.; 830 831 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 832 sctx.shift_top = 0.; 833 for (i=0; i<n; i++) { 834 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 835 d = (a->a)[diag[i]]; 836 rs = -PetscAbsScalar(d) - PetscRealPart(d); 837 v = a->a+ai[i]; 838 nz = ai[i+1] - ai[i]; 839 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 840 if (rs>sctx.shift_top) sctx.shift_top = rs; 841 } 842 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 843 sctx.shift_top *= 1.1; 844 sctx.nshift_max = 5; 845 sctx.shift_lo = 0.; 846 sctx.shift_hi = 1.; 847 } 848 849 sctx.shift_amount = 0.; 850 sctx.nshift = 0; 851 #endif 852 853 do { 854 sctx.newshift = PETSC_FALSE; 855 for (i=0; i<n; i++) { 856 /* load in initial unfactored row */ 857 nz = ai[r[i]+1] - ai[r[i]]; 858 ajtmp = aj + ai[r[i]]; 859 v = a->a + ai[r[i]]; 860 /* sort permuted ajtmp and values v accordingly */ 861 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 862 PetscCall(PetscSortIntWithScalarArray(nz,ajtmp,v)); 863 864 diag[r[i]] = ai[r[i]]; 865 for (j=0; j<nz; j++) { 866 rtmp[ajtmp[j]] = v[j]; 867 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 868 } 869 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 870 871 row = *ajtmp++; 872 while (row < i) { 873 pc = rtmp + row; 874 if (*pc != 0.0) { 875 pv = a->a + diag[r[row]]; 876 pj = aj + diag[r[row]] + 1; 877 878 multiplier = *pc / *pv++; 879 *pc = multiplier; 880 nz = ai[r[row]+1] - diag[r[row]] - 1; 881 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 882 PetscCall(PetscLogFlops(1+2.0*nz)); 883 } 884 row = *ajtmp++; 885 } 886 /* finished row so overwrite it onto a->a */ 887 pv = a->a + ai[r[i]]; 888 pj = aj + ai[r[i]]; 889 nz = ai[r[i]+1] - ai[r[i]]; 890 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 891 892 rs = 0.0; 893 for (j=0; j<nz; j++) { 894 pv[j] = rtmp[pj[j]]; 895 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 896 } 897 898 sctx.rs = rs; 899 sctx.pv = pv[nbdiag]; 900 PetscCall(MatPivotCheck(B,A,info,&sctx,i)); 901 if (sctx.newshift) break; 902 pv[nbdiag] = sctx.pv; 903 } 904 905 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 906 /* 907 * if no shift in this attempt & shifting & started shifting & can refine, 908 * then try lower shift 909 */ 910 sctx.shift_hi = sctx.shift_fraction; 911 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 912 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 913 sctx.newshift = PETSC_TRUE; 914 sctx.nshift++; 915 } 916 } while (sctx.newshift); 917 918 /* invert diagonal entries for simpler triangular solves */ 919 for (i=0; i<n; i++) { 920 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 921 } 922 923 PetscCall(PetscFree(rtmp)); 924 PetscCall(ISRestoreIndices(isicol,&ic)); 925 PetscCall(ISRestoreIndices(isrow,&r)); 926 927 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 928 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 929 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 930 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 931 932 A->assembled = PETSC_TRUE; 933 A->preallocated = PETSC_TRUE; 934 935 PetscCall(PetscLogFlops(A->cmap->n)); 936 if (sctx.nshift) { 937 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 938 PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top)); 939 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 940 PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 941 } 942 } 943 PetscFunctionReturn(0); 944 } 945 946 /* ----------------------------------------------------------- */ 947 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 948 { 949 Mat C; 950 951 PetscFunctionBegin; 952 PetscCall(MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C)); 953 PetscCall(MatLUFactorSymbolic(C,A,row,col,info)); 954 PetscCall(MatLUFactorNumeric(C,A,info)); 955 956 A->ops->solve = C->ops->solve; 957 A->ops->solvetranspose = C->ops->solvetranspose; 958 959 PetscCall(MatHeaderMerge(A,&C)); 960 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol)); 961 PetscFunctionReturn(0); 962 } 963 /* ----------------------------------------------------------- */ 964 965 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 966 { 967 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 968 IS iscol = a->col,isrow = a->row; 969 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 970 PetscInt nz; 971 const PetscInt *rout,*cout,*r,*c; 972 PetscScalar *x,*tmp,*tmps,sum; 973 const PetscScalar *b; 974 const MatScalar *aa = a->a,*v; 975 976 PetscFunctionBegin; 977 if (!n) PetscFunctionReturn(0); 978 979 PetscCall(VecGetArrayRead(bb,&b)); 980 PetscCall(VecGetArrayWrite(xx,&x)); 981 tmp = a->solve_work; 982 983 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 984 PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1); 985 986 /* forward solve the lower triangular */ 987 tmp[0] = b[*r++]; 988 tmps = tmp; 989 for (i=1; i<n; i++) { 990 v = aa + ai[i]; 991 vi = aj + ai[i]; 992 nz = a->diag[i] - ai[i]; 993 sum = b[*r++]; 994 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 995 tmp[i] = sum; 996 } 997 998 /* backward solve the upper triangular */ 999 for (i=n-1; i>=0; i--) { 1000 v = aa + a->diag[i] + 1; 1001 vi = aj + a->diag[i] + 1; 1002 nz = ai[i+1] - a->diag[i] - 1; 1003 sum = tmp[i]; 1004 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1005 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1006 } 1007 1008 PetscCall(ISRestoreIndices(isrow,&rout)); 1009 PetscCall(ISRestoreIndices(iscol,&cout)); 1010 PetscCall(VecRestoreArrayRead(bb,&b)); 1011 PetscCall(VecRestoreArrayWrite(xx,&x)); 1012 PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n)); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1017 { 1018 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1019 IS iscol = a->col,isrow = a->row; 1020 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1021 PetscInt nz,neq,ldb,ldx; 1022 const PetscInt *rout,*cout,*r,*c; 1023 PetscScalar *x,*tmp = a->solve_work,*tmps,sum; 1024 const PetscScalar *b,*aa = a->a,*v; 1025 PetscBool isdense; 1026 1027 PetscFunctionBegin; 1028 if (!n) PetscFunctionReturn(0); 1029 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense)); 1030 PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1031 if (X != B) { 1032 PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense)); 1033 PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1034 } 1035 PetscCall(MatDenseGetArrayRead(B,&b)); 1036 PetscCall(MatDenseGetLDA(B,&ldb)); 1037 PetscCall(MatDenseGetArray(X,&x)); 1038 PetscCall(MatDenseGetLDA(X,&ldx)); 1039 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1040 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1041 for (neq=0; neq<B->cmap->n; neq++) { 1042 /* forward solve the lower triangular */ 1043 tmp[0] = b[r[0]]; 1044 tmps = tmp; 1045 for (i=1; i<n; i++) { 1046 v = aa + ai[i]; 1047 vi = aj + ai[i]; 1048 nz = a->diag[i] - ai[i]; 1049 sum = b[r[i]]; 1050 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1051 tmp[i] = sum; 1052 } 1053 /* backward solve the upper triangular */ 1054 for (i=n-1; i>=0; i--) { 1055 v = aa + a->diag[i] + 1; 1056 vi = aj + a->diag[i] + 1; 1057 nz = ai[i+1] - a->diag[i] - 1; 1058 sum = tmp[i]; 1059 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1060 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1061 } 1062 b += ldb; 1063 x += ldx; 1064 } 1065 PetscCall(ISRestoreIndices(isrow,&rout)); 1066 PetscCall(ISRestoreIndices(iscol,&cout)); 1067 PetscCall(MatDenseRestoreArrayRead(B,&b)); 1068 PetscCall(MatDenseRestoreArray(X,&x)); 1069 PetscCall(PetscLogFlops(B->cmap->n*(2.0*a->nz - n))); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1074 { 1075 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1076 IS iscol = a->col,isrow = a->row; 1077 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1078 PetscInt nz,neq,ldb,ldx; 1079 const PetscInt *rout,*cout,*r,*c; 1080 PetscScalar *x,*tmp = a->solve_work,sum; 1081 const PetscScalar *b,*aa = a->a,*v; 1082 PetscBool isdense; 1083 1084 PetscFunctionBegin; 1085 if (!n) PetscFunctionReturn(0); 1086 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense)); 1087 PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1088 if (X != B) { 1089 PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense)); 1090 PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1091 } 1092 PetscCall(MatDenseGetArrayRead(B,&b)); 1093 PetscCall(MatDenseGetLDA(B,&ldb)); 1094 PetscCall(MatDenseGetArray(X,&x)); 1095 PetscCall(MatDenseGetLDA(X,&ldx)); 1096 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1097 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1098 for (neq=0; neq<B->cmap->n; neq++) { 1099 /* forward solve the lower triangular */ 1100 tmp[0] = b[r[0]]; 1101 v = aa; 1102 vi = aj; 1103 for (i=1; i<n; i++) { 1104 nz = ai[i+1] - ai[i]; 1105 sum = b[r[i]]; 1106 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1107 tmp[i] = sum; 1108 v += nz; vi += nz; 1109 } 1110 /* backward solve the upper triangular */ 1111 for (i=n-1; i>=0; i--) { 1112 v = aa + adiag[i+1]+1; 1113 vi = aj + adiag[i+1]+1; 1114 nz = adiag[i]-adiag[i+1]-1; 1115 sum = tmp[i]; 1116 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1117 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1118 } 1119 b += ldb; 1120 x += ldx; 1121 } 1122 PetscCall(ISRestoreIndices(isrow,&rout)); 1123 PetscCall(ISRestoreIndices(iscol,&cout)); 1124 PetscCall(MatDenseRestoreArrayRead(B,&b)); 1125 PetscCall(MatDenseRestoreArray(X,&x)); 1126 PetscCall(PetscLogFlops(B->cmap->n*(2.0*a->nz - n))); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1131 { 1132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1133 IS iscol = a->col,isrow = a->row; 1134 const PetscInt *r,*c,*rout,*cout; 1135 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1136 PetscInt nz,row; 1137 PetscScalar *x,*tmp,*tmps,sum; 1138 const PetscScalar *b; 1139 const MatScalar *aa = a->a,*v; 1140 1141 PetscFunctionBegin; 1142 if (!n) PetscFunctionReturn(0); 1143 1144 PetscCall(VecGetArrayRead(bb,&b)); 1145 PetscCall(VecGetArrayWrite(xx,&x)); 1146 tmp = a->solve_work; 1147 1148 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1149 PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1); 1150 1151 /* forward solve the lower triangular */ 1152 tmp[0] = b[*r++]; 1153 tmps = tmp; 1154 for (row=1; row<n; row++) { 1155 i = rout[row]; /* permuted row */ 1156 v = aa + ai[i]; 1157 vi = aj + ai[i]; 1158 nz = a->diag[i] - ai[i]; 1159 sum = b[*r++]; 1160 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1161 tmp[row] = sum; 1162 } 1163 1164 /* backward solve the upper triangular */ 1165 for (row=n-1; row>=0; row--) { 1166 i = rout[row]; /* permuted row */ 1167 v = aa + a->diag[i] + 1; 1168 vi = aj + a->diag[i] + 1; 1169 nz = ai[i+1] - a->diag[i] - 1; 1170 sum = tmp[row]; 1171 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1172 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1173 } 1174 1175 PetscCall(ISRestoreIndices(isrow,&rout)); 1176 PetscCall(ISRestoreIndices(iscol,&cout)); 1177 PetscCall(VecRestoreArrayRead(bb,&b)); 1178 PetscCall(VecRestoreArrayWrite(xx,&x)); 1179 PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n)); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 /* ----------------------------------------------------------- */ 1184 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1185 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1186 { 1187 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1188 PetscInt n = A->rmap->n; 1189 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1190 PetscScalar *x; 1191 const PetscScalar *b; 1192 const MatScalar *aa = a->a; 1193 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1194 PetscInt adiag_i,i,nz,ai_i; 1195 const PetscInt *vi; 1196 const MatScalar *v; 1197 PetscScalar sum; 1198 #endif 1199 1200 PetscFunctionBegin; 1201 if (!n) PetscFunctionReturn(0); 1202 1203 PetscCall(VecGetArrayRead(bb,&b)); 1204 PetscCall(VecGetArrayWrite(xx,&x)); 1205 1206 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1207 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1208 #else 1209 /* forward solve the lower triangular */ 1210 x[0] = b[0]; 1211 for (i=1; i<n; i++) { 1212 ai_i = ai[i]; 1213 v = aa + ai_i; 1214 vi = aj + ai_i; 1215 nz = adiag[i] - ai_i; 1216 sum = b[i]; 1217 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1218 x[i] = sum; 1219 } 1220 1221 /* backward solve the upper triangular */ 1222 for (i=n-1; i>=0; i--) { 1223 adiag_i = adiag[i]; 1224 v = aa + adiag_i + 1; 1225 vi = aj + adiag_i + 1; 1226 nz = ai[i+1] - adiag_i - 1; 1227 sum = x[i]; 1228 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1229 x[i] = sum*aa[adiag_i]; 1230 } 1231 #endif 1232 PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n)); 1233 PetscCall(VecRestoreArrayRead(bb,&b)); 1234 PetscCall(VecRestoreArrayWrite(xx,&x)); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1239 { 1240 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1241 IS iscol = a->col,isrow = a->row; 1242 PetscInt i, n = A->rmap->n,j; 1243 PetscInt nz; 1244 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1245 PetscScalar *x,*tmp,sum; 1246 const PetscScalar *b; 1247 const MatScalar *aa = a->a,*v; 1248 1249 PetscFunctionBegin; 1250 if (yy != xx) PetscCall(VecCopy(yy,xx)); 1251 1252 PetscCall(VecGetArrayRead(bb,&b)); 1253 PetscCall(VecGetArray(xx,&x)); 1254 tmp = a->solve_work; 1255 1256 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1257 PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1); 1258 1259 /* forward solve the lower triangular */ 1260 tmp[0] = b[*r++]; 1261 for (i=1; i<n; i++) { 1262 v = aa + ai[i]; 1263 vi = aj + ai[i]; 1264 nz = a->diag[i] - ai[i]; 1265 sum = b[*r++]; 1266 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1267 tmp[i] = sum; 1268 } 1269 1270 /* backward solve the upper triangular */ 1271 for (i=n-1; i>=0; i--) { 1272 v = aa + a->diag[i] + 1; 1273 vi = aj + a->diag[i] + 1; 1274 nz = ai[i+1] - a->diag[i] - 1; 1275 sum = tmp[i]; 1276 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1277 tmp[i] = sum*aa[a->diag[i]]; 1278 x[*c--] += tmp[i]; 1279 } 1280 1281 PetscCall(ISRestoreIndices(isrow,&rout)); 1282 PetscCall(ISRestoreIndices(iscol,&cout)); 1283 PetscCall(VecRestoreArrayRead(bb,&b)); 1284 PetscCall(VecRestoreArray(xx,&x)); 1285 PetscCall(PetscLogFlops(2.0*a->nz)); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1290 { 1291 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1292 IS iscol = a->col,isrow = a->row; 1293 PetscInt i, n = A->rmap->n,j; 1294 PetscInt nz; 1295 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1296 PetscScalar *x,*tmp,sum; 1297 const PetscScalar *b; 1298 const MatScalar *aa = a->a,*v; 1299 1300 PetscFunctionBegin; 1301 if (yy != xx) PetscCall(VecCopy(yy,xx)); 1302 1303 PetscCall(VecGetArrayRead(bb,&b)); 1304 PetscCall(VecGetArray(xx,&x)); 1305 tmp = a->solve_work; 1306 1307 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1308 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1309 1310 /* forward solve the lower triangular */ 1311 tmp[0] = b[r[0]]; 1312 v = aa; 1313 vi = aj; 1314 for (i=1; i<n; i++) { 1315 nz = ai[i+1] - ai[i]; 1316 sum = b[r[i]]; 1317 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1318 tmp[i] = sum; 1319 v += nz; 1320 vi += nz; 1321 } 1322 1323 /* backward solve the upper triangular */ 1324 v = aa + adiag[n-1]; 1325 vi = aj + adiag[n-1]; 1326 for (i=n-1; i>=0; i--) { 1327 nz = adiag[i] - adiag[i+1] - 1; 1328 sum = tmp[i]; 1329 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1330 tmp[i] = sum*v[nz]; 1331 x[c[i]] += tmp[i]; 1332 v += nz+1; vi += nz+1; 1333 } 1334 1335 PetscCall(ISRestoreIndices(isrow,&rout)); 1336 PetscCall(ISRestoreIndices(iscol,&cout)); 1337 PetscCall(VecRestoreArrayRead(bb,&b)); 1338 PetscCall(VecRestoreArray(xx,&x)); 1339 PetscCall(PetscLogFlops(2.0*a->nz)); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1344 { 1345 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1346 IS iscol = a->col,isrow = a->row; 1347 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1348 PetscInt i,n = A->rmap->n,j; 1349 PetscInt nz; 1350 PetscScalar *x,*tmp,s1; 1351 const MatScalar *aa = a->a,*v; 1352 const PetscScalar *b; 1353 1354 PetscFunctionBegin; 1355 PetscCall(VecGetArrayRead(bb,&b)); 1356 PetscCall(VecGetArrayWrite(xx,&x)); 1357 tmp = a->solve_work; 1358 1359 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1360 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1361 1362 /* copy the b into temp work space according to permutation */ 1363 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1364 1365 /* forward solve the U^T */ 1366 for (i=0; i<n; i++) { 1367 v = aa + diag[i]; 1368 vi = aj + diag[i] + 1; 1369 nz = ai[i+1] - diag[i] - 1; 1370 s1 = tmp[i]; 1371 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1372 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1373 tmp[i] = s1; 1374 } 1375 1376 /* backward solve the L^T */ 1377 for (i=n-1; i>=0; i--) { 1378 v = aa + diag[i] - 1; 1379 vi = aj + diag[i] - 1; 1380 nz = diag[i] - ai[i]; 1381 s1 = tmp[i]; 1382 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1383 } 1384 1385 /* copy tmp into x according to permutation */ 1386 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1387 1388 PetscCall(ISRestoreIndices(isrow,&rout)); 1389 PetscCall(ISRestoreIndices(iscol,&cout)); 1390 PetscCall(VecRestoreArrayRead(bb,&b)); 1391 PetscCall(VecRestoreArrayWrite(xx,&x)); 1392 1393 PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n)); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1398 { 1399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1400 IS iscol = a->col,isrow = a->row; 1401 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1402 PetscInt i,n = A->rmap->n,j; 1403 PetscInt nz; 1404 PetscScalar *x,*tmp,s1; 1405 const MatScalar *aa = a->a,*v; 1406 const PetscScalar *b; 1407 1408 PetscFunctionBegin; 1409 PetscCall(VecGetArrayRead(bb,&b)); 1410 PetscCall(VecGetArrayWrite(xx,&x)); 1411 tmp = a->solve_work; 1412 1413 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1414 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1415 1416 /* copy the b into temp work space according to permutation */ 1417 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1418 1419 /* forward solve the U^T */ 1420 for (i=0; i<n; i++) { 1421 v = aa + adiag[i+1] + 1; 1422 vi = aj + adiag[i+1] + 1; 1423 nz = adiag[i] - adiag[i+1] - 1; 1424 s1 = tmp[i]; 1425 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1426 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1427 tmp[i] = s1; 1428 } 1429 1430 /* backward solve the L^T */ 1431 for (i=n-1; i>=0; i--) { 1432 v = aa + ai[i]; 1433 vi = aj + ai[i]; 1434 nz = ai[i+1] - ai[i]; 1435 s1 = tmp[i]; 1436 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1437 } 1438 1439 /* copy tmp into x according to permutation */ 1440 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1441 1442 PetscCall(ISRestoreIndices(isrow,&rout)); 1443 PetscCall(ISRestoreIndices(iscol,&cout)); 1444 PetscCall(VecRestoreArrayRead(bb,&b)); 1445 PetscCall(VecRestoreArrayWrite(xx,&x)); 1446 1447 PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n)); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1452 { 1453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1454 IS iscol = a->col,isrow = a->row; 1455 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1456 PetscInt i,n = A->rmap->n,j; 1457 PetscInt nz; 1458 PetscScalar *x,*tmp,s1; 1459 const MatScalar *aa = a->a,*v; 1460 const PetscScalar *b; 1461 1462 PetscFunctionBegin; 1463 if (zz != xx) PetscCall(VecCopy(zz,xx)); 1464 PetscCall(VecGetArrayRead(bb,&b)); 1465 PetscCall(VecGetArray(xx,&x)); 1466 tmp = a->solve_work; 1467 1468 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1469 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1470 1471 /* copy the b into temp work space according to permutation */ 1472 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1473 1474 /* forward solve the U^T */ 1475 for (i=0; i<n; i++) { 1476 v = aa + diag[i]; 1477 vi = aj + diag[i] + 1; 1478 nz = ai[i+1] - diag[i] - 1; 1479 s1 = tmp[i]; 1480 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1481 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1482 tmp[i] = s1; 1483 } 1484 1485 /* backward solve the L^T */ 1486 for (i=n-1; i>=0; i--) { 1487 v = aa + diag[i] - 1; 1488 vi = aj + diag[i] - 1; 1489 nz = diag[i] - ai[i]; 1490 s1 = tmp[i]; 1491 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1492 } 1493 1494 /* copy tmp into x according to permutation */ 1495 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1496 1497 PetscCall(ISRestoreIndices(isrow,&rout)); 1498 PetscCall(ISRestoreIndices(iscol,&cout)); 1499 PetscCall(VecRestoreArrayRead(bb,&b)); 1500 PetscCall(VecRestoreArray(xx,&x)); 1501 1502 PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n)); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1507 { 1508 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1509 IS iscol = a->col,isrow = a->row; 1510 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1511 PetscInt i,n = A->rmap->n,j; 1512 PetscInt nz; 1513 PetscScalar *x,*tmp,s1; 1514 const MatScalar *aa = a->a,*v; 1515 const PetscScalar *b; 1516 1517 PetscFunctionBegin; 1518 if (zz != xx) PetscCall(VecCopy(zz,xx)); 1519 PetscCall(VecGetArrayRead(bb,&b)); 1520 PetscCall(VecGetArray(xx,&x)); 1521 tmp = a->solve_work; 1522 1523 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1524 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1525 1526 /* copy the b into temp work space according to permutation */ 1527 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1528 1529 /* forward solve the U^T */ 1530 for (i=0; i<n; i++) { 1531 v = aa + adiag[i+1] + 1; 1532 vi = aj + adiag[i+1] + 1; 1533 nz = adiag[i] - adiag[i+1] - 1; 1534 s1 = tmp[i]; 1535 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1536 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1537 tmp[i] = s1; 1538 } 1539 1540 /* backward solve the L^T */ 1541 for (i=n-1; i>=0; i--) { 1542 v = aa + ai[i]; 1543 vi = aj + ai[i]; 1544 nz = ai[i+1] - ai[i]; 1545 s1 = tmp[i]; 1546 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1547 } 1548 1549 /* copy tmp into x according to permutation */ 1550 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1551 1552 PetscCall(ISRestoreIndices(isrow,&rout)); 1553 PetscCall(ISRestoreIndices(iscol,&cout)); 1554 PetscCall(VecRestoreArrayRead(bb,&b)); 1555 PetscCall(VecRestoreArray(xx,&x)); 1556 1557 PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n)); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 /* ----------------------------------------------------------------*/ 1562 1563 /* 1564 ilu() under revised new data structure. 1565 Factored arrays bj and ba are stored as 1566 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1567 1568 bi=fact->i is an array of size n+1, in which 1569 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1570 bi[n]: points to L(n-1,n-1)+1 1571 1572 bdiag=fact->diag is an array of size n+1,in which 1573 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1574 bdiag[n]: points to entry of U(n-1,0)-1 1575 1576 U(i,:) contains bdiag[i] as its last entry, i.e., 1577 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1578 */ 1579 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1580 { 1581 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1582 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1583 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1584 IS isicol; 1585 1586 PetscFunctionBegin; 1587 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 1588 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE)); 1589 b = (Mat_SeqAIJ*)(fact)->data; 1590 1591 /* allocate matrix arrays for new data structure */ 1592 PetscCall(PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i)); 1593 PetscCall(PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt))); 1594 1595 b->singlemalloc = PETSC_TRUE; 1596 if (!b->diag) { 1597 PetscCall(PetscMalloc1(n+1,&b->diag)); 1598 PetscCall(PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt))); 1599 } 1600 bdiag = b->diag; 1601 1602 if (n > 0) { 1603 PetscCall(PetscArrayzero(b->a,ai[n])); 1604 } 1605 1606 /* set bi and bj with new data structure */ 1607 bi = b->i; 1608 bj = b->j; 1609 1610 /* L part */ 1611 bi[0] = 0; 1612 for (i=0; i<n; i++) { 1613 nz = adiag[i] - ai[i]; 1614 bi[i+1] = bi[i] + nz; 1615 aj = a->j + ai[i]; 1616 for (j=0; j<nz; j++) { 1617 bj[k++] = aj[j]; 1618 } 1619 } 1620 1621 /* U part */ 1622 bdiag[n] = bi[n]-1; 1623 for (i=n-1; i>=0; i--) { 1624 nz = ai[i+1] - adiag[i] - 1; 1625 aj = a->j + adiag[i] + 1; 1626 for (j=0; j<nz; j++) { 1627 bj[k++] = aj[j]; 1628 } 1629 /* diag[i] */ 1630 bj[k++] = i; 1631 bdiag[i] = bdiag[i+1] + nz + 1; 1632 } 1633 1634 fact->factortype = MAT_FACTOR_ILU; 1635 fact->info.factor_mallocs = 0; 1636 fact->info.fill_ratio_given = info->fill; 1637 fact->info.fill_ratio_needed = 1.0; 1638 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1639 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1640 1641 b = (Mat_SeqAIJ*)(fact)->data; 1642 b->row = isrow; 1643 b->col = iscol; 1644 b->icol = isicol; 1645 PetscCall(PetscMalloc1(fact->rmap->n+1,&b->solve_work)); 1646 PetscCall(PetscObjectReference((PetscObject)isrow)); 1647 PetscCall(PetscObjectReference((PetscObject)iscol)); 1648 PetscFunctionReturn(0); 1649 } 1650 1651 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1652 { 1653 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1654 IS isicol; 1655 const PetscInt *r,*ic; 1656 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1657 PetscInt *bi,*cols,nnz,*cols_lvl; 1658 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1659 PetscInt i,levels,diagonal_fill; 1660 PetscBool col_identity,row_identity,missing; 1661 PetscReal f; 1662 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1663 PetscBT lnkbt; 1664 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1665 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1666 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1667 1668 PetscFunctionBegin; 1669 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 1670 PetscCall(MatMissingDiagonal(A,&missing,&i)); 1671 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 1672 1673 levels = (PetscInt)info->levels; 1674 PetscCall(ISIdentity(isrow,&row_identity)); 1675 PetscCall(ISIdentity(iscol,&col_identity)); 1676 if (!levels && row_identity && col_identity) { 1677 /* special case: ilu(0) with natural ordering */ 1678 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info)); 1679 if (a->inode.size) { 1680 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1681 } 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 1686 PetscCall(ISGetIndices(isrow,&r)); 1687 PetscCall(ISGetIndices(isicol,&ic)); 1688 1689 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1690 PetscCall(PetscMalloc1(n+1,&bi)); 1691 PetscCall(PetscMalloc1(n+1,&bdiag)); 1692 bi[0] = bdiag[0] = 0; 1693 PetscCall(PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr)); 1694 1695 /* create a linked list for storing column indices of the active row */ 1696 nlnk = n + 1; 1697 PetscCall(PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt)); 1698 1699 /* initial FreeSpace size is f*(ai[n]+1) */ 1700 f = info->fill; 1701 diagonal_fill = (PetscInt)info->diagonal_fill; 1702 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 1703 current_space = free_space; 1704 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl)); 1705 current_space_lvl = free_space_lvl; 1706 for (i=0; i<n; i++) { 1707 nzi = 0; 1708 /* copy current row into linked list */ 1709 nnz = ai[r[i]+1] - ai[r[i]]; 1710 PetscCheck(nnz,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i); 1711 cols = aj + ai[r[i]]; 1712 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1713 PetscCall(PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt)); 1714 nzi += nlnk; 1715 1716 /* make sure diagonal entry is included */ 1717 if (diagonal_fill && lnk[i] == -1) { 1718 fm = n; 1719 while (lnk[fm] < i) fm = lnk[fm]; 1720 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1721 lnk[fm] = i; 1722 lnk_lvl[i] = 0; 1723 nzi++; dcount++; 1724 } 1725 1726 /* add pivot rows into the active row */ 1727 nzbd = 0; 1728 prow = lnk[n]; 1729 while (prow < i) { 1730 nnz = bdiag[prow]; 1731 cols = bj_ptr[prow] + nnz + 1; 1732 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1733 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1734 PetscCall(PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow)); 1735 nzi += nlnk; 1736 prow = lnk[prow]; 1737 nzbd++; 1738 } 1739 bdiag[i] = nzbd; 1740 bi[i+1] = bi[i] + nzi; 1741 /* if free space is not available, make more free space */ 1742 if (current_space->local_remaining<nzi) { 1743 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */ 1744 PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 1745 PetscCall(PetscFreeSpaceGet(nnz,¤t_space_lvl)); 1746 reallocs++; 1747 } 1748 1749 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1750 PetscCall(PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt)); 1751 bj_ptr[i] = current_space->array; 1752 bjlvl_ptr[i] = current_space_lvl->array; 1753 1754 /* make sure the active row i has diagonal entry */ 1755 PetscCheck(*(bj_ptr[i]+bdiag[i]) == i,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1756 1757 current_space->array += nzi; 1758 current_space->local_used += nzi; 1759 current_space->local_remaining -= nzi; 1760 current_space_lvl->array += nzi; 1761 current_space_lvl->local_used += nzi; 1762 current_space_lvl->local_remaining -= nzi; 1763 } 1764 1765 PetscCall(ISRestoreIndices(isrow,&r)); 1766 PetscCall(ISRestoreIndices(isicol,&ic)); 1767 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1768 PetscCall(PetscMalloc1(bi[n]+1,&bj)); 1769 PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag)); 1770 1771 PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt)); 1772 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1773 PetscCall(PetscFree2(bj_ptr,bjlvl_ptr)); 1774 1775 #if defined(PETSC_USE_INFO) 1776 { 1777 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1778 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 1779 PetscCall(PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af)); 1780 PetscCall(PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af)); 1781 PetscCall(PetscInfo(A,"for best performance.\n")); 1782 if (diagonal_fill) { 1783 PetscCall(PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount)); 1784 } 1785 } 1786 #endif 1787 /* put together the new matrix */ 1788 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL)); 1789 PetscCall(PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol)); 1790 b = (Mat_SeqAIJ*)(fact)->data; 1791 1792 b->free_a = PETSC_TRUE; 1793 b->free_ij = PETSC_TRUE; 1794 b->singlemalloc = PETSC_FALSE; 1795 1796 PetscCall(PetscMalloc1(bdiag[0]+1,&b->a)); 1797 1798 b->j = bj; 1799 b->i = bi; 1800 b->diag = bdiag; 1801 b->ilen = NULL; 1802 b->imax = NULL; 1803 b->row = isrow; 1804 b->col = iscol; 1805 PetscCall(PetscObjectReference((PetscObject)isrow)); 1806 PetscCall(PetscObjectReference((PetscObject)iscol)); 1807 b->icol = isicol; 1808 1809 PetscCall(PetscMalloc1(n+1,&b->solve_work)); 1810 /* In b structure: Free imax, ilen, old a, old j. 1811 Allocate bdiag, solve_work, new a, new j */ 1812 PetscCall(PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)))); 1813 b->maxnz = b->nz = bdiag[0]+1; 1814 1815 (fact)->info.factor_mallocs = reallocs; 1816 (fact)->info.fill_ratio_given = f; 1817 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1818 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1819 if (a->inode.size) { 1820 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1821 } 1822 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1827 { 1828 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1829 IS isicol; 1830 const PetscInt *r,*ic; 1831 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1832 PetscInt *bi,*cols,nnz,*cols_lvl; 1833 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1834 PetscInt i,levels,diagonal_fill; 1835 PetscBool col_identity,row_identity; 1836 PetscReal f; 1837 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1838 PetscBT lnkbt; 1839 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1840 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1841 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1842 PetscBool missing; 1843 1844 PetscFunctionBegin; 1845 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 1846 PetscCall(MatMissingDiagonal(A,&missing,&i)); 1847 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 1848 1849 f = info->fill; 1850 levels = (PetscInt)info->levels; 1851 diagonal_fill = (PetscInt)info->diagonal_fill; 1852 1853 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 1854 1855 PetscCall(ISIdentity(isrow,&row_identity)); 1856 PetscCall(ISIdentity(iscol,&col_identity)); 1857 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1858 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE)); 1859 1860 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1861 if (a->inode.size) { 1862 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1863 } 1864 fact->factortype = MAT_FACTOR_ILU; 1865 (fact)->info.factor_mallocs = 0; 1866 (fact)->info.fill_ratio_given = info->fill; 1867 (fact)->info.fill_ratio_needed = 1.0; 1868 1869 b = (Mat_SeqAIJ*)(fact)->data; 1870 b->row = isrow; 1871 b->col = iscol; 1872 b->icol = isicol; 1873 PetscCall(PetscMalloc1((fact)->rmap->n+1,&b->solve_work)); 1874 PetscCall(PetscObjectReference((PetscObject)isrow)); 1875 PetscCall(PetscObjectReference((PetscObject)iscol)); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 PetscCall(ISGetIndices(isrow,&r)); 1880 PetscCall(ISGetIndices(isicol,&ic)); 1881 1882 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1883 PetscCall(PetscMalloc1(n+1,&bi)); 1884 PetscCall(PetscMalloc1(n+1,&bdiag)); 1885 bi[0] = bdiag[0] = 0; 1886 1887 PetscCall(PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr)); 1888 1889 /* create a linked list for storing column indices of the active row */ 1890 nlnk = n + 1; 1891 PetscCall(PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt)); 1892 1893 /* initial FreeSpace size is f*(ai[n]+1) */ 1894 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 1895 current_space = free_space; 1896 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl)); 1897 current_space_lvl = free_space_lvl; 1898 1899 for (i=0; i<n; i++) { 1900 nzi = 0; 1901 /* copy current row into linked list */ 1902 nnz = ai[r[i]+1] - ai[r[i]]; 1903 PetscCheck(nnz,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i); 1904 cols = aj + ai[r[i]]; 1905 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1906 PetscCall(PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt)); 1907 nzi += nlnk; 1908 1909 /* make sure diagonal entry is included */ 1910 if (diagonal_fill && lnk[i] == -1) { 1911 fm = n; 1912 while (lnk[fm] < i) fm = lnk[fm]; 1913 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1914 lnk[fm] = i; 1915 lnk_lvl[i] = 0; 1916 nzi++; dcount++; 1917 } 1918 1919 /* add pivot rows into the active row */ 1920 nzbd = 0; 1921 prow = lnk[n]; 1922 while (prow < i) { 1923 nnz = bdiag[prow]; 1924 cols = bj_ptr[prow] + nnz + 1; 1925 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1926 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1927 PetscCall(PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow)); 1928 nzi += nlnk; 1929 prow = lnk[prow]; 1930 nzbd++; 1931 } 1932 bdiag[i] = nzbd; 1933 bi[i+1] = bi[i] + nzi; 1934 1935 /* if free space is not available, make more free space */ 1936 if (current_space->local_remaining<nzi) { 1937 nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */ 1938 PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 1939 PetscCall(PetscFreeSpaceGet(nnz,¤t_space_lvl)); 1940 reallocs++; 1941 } 1942 1943 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1944 PetscCall(PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt)); 1945 bj_ptr[i] = current_space->array; 1946 bjlvl_ptr[i] = current_space_lvl->array; 1947 1948 /* make sure the active row i has diagonal entry */ 1949 PetscCheck(*(bj_ptr[i]+bdiag[i]) == i,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1950 1951 current_space->array += nzi; 1952 current_space->local_used += nzi; 1953 current_space->local_remaining -= nzi; 1954 current_space_lvl->array += nzi; 1955 current_space_lvl->local_used += nzi; 1956 current_space_lvl->local_remaining -= nzi; 1957 } 1958 1959 PetscCall(ISRestoreIndices(isrow,&r)); 1960 PetscCall(ISRestoreIndices(isicol,&ic)); 1961 1962 /* destroy list of free space and other temporary arrays */ 1963 PetscCall(PetscMalloc1(bi[n]+1,&bj)); 1964 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); /* copy free_space -> bj */ 1965 PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt)); 1966 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1967 PetscCall(PetscFree2(bj_ptr,bjlvl_ptr)); 1968 1969 #if defined(PETSC_USE_INFO) 1970 { 1971 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1972 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 1973 PetscCall(PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af)); 1974 PetscCall(PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af)); 1975 PetscCall(PetscInfo(A,"for best performance.\n")); 1976 if (diagonal_fill) { 1977 PetscCall(PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount)); 1978 } 1979 } 1980 #endif 1981 1982 /* put together the new matrix */ 1983 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL)); 1984 PetscCall(PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol)); 1985 b = (Mat_SeqAIJ*)(fact)->data; 1986 1987 b->free_a = PETSC_TRUE; 1988 b->free_ij = PETSC_TRUE; 1989 b->singlemalloc = PETSC_FALSE; 1990 1991 PetscCall(PetscMalloc1(bi[n],&b->a)); 1992 b->j = bj; 1993 b->i = bi; 1994 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1995 b->diag = bdiag; 1996 b->ilen = NULL; 1997 b->imax = NULL; 1998 b->row = isrow; 1999 b->col = iscol; 2000 PetscCall(PetscObjectReference((PetscObject)isrow)); 2001 PetscCall(PetscObjectReference((PetscObject)iscol)); 2002 b->icol = isicol; 2003 PetscCall(PetscMalloc1(n+1,&b->solve_work)); 2004 /* In b structure: Free imax, ilen, old a, old j. 2005 Allocate bdiag, solve_work, new a, new j */ 2006 PetscCall(PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)))); 2007 b->maxnz = b->nz = bi[n]; 2008 2009 (fact)->info.factor_mallocs = reallocs; 2010 (fact)->info.fill_ratio_given = f; 2011 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2012 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2013 if (a->inode.size) { 2014 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2020 { 2021 Mat C = B; 2022 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2023 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2024 IS ip=b->row,iip = b->icol; 2025 const PetscInt *rip,*riip; 2026 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2027 PetscInt *ai=a->i,*aj=a->j; 2028 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2029 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2030 PetscBool perm_identity; 2031 FactorShiftCtx sctx; 2032 PetscReal rs; 2033 MatScalar d,*v; 2034 2035 PetscFunctionBegin; 2036 /* MatPivotSetUp(): initialize shift context sctx */ 2037 PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx))); 2038 2039 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2040 sctx.shift_top = info->zeropivot; 2041 for (i=0; i<mbs; i++) { 2042 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2043 d = (aa)[a->diag[i]]; 2044 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2045 v = aa+ai[i]; 2046 nz = ai[i+1] - ai[i]; 2047 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2048 if (rs>sctx.shift_top) sctx.shift_top = rs; 2049 } 2050 sctx.shift_top *= 1.1; 2051 sctx.nshift_max = 5; 2052 sctx.shift_lo = 0.; 2053 sctx.shift_hi = 1.; 2054 } 2055 2056 PetscCall(ISGetIndices(ip,&rip)); 2057 PetscCall(ISGetIndices(iip,&riip)); 2058 2059 /* allocate working arrays 2060 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2061 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2062 */ 2063 PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r)); 2064 2065 do { 2066 sctx.newshift = PETSC_FALSE; 2067 2068 for (i=0; i<mbs; i++) c2r[i] = mbs; 2069 if (mbs) il[0] = 0; 2070 2071 for (k = 0; k<mbs; k++) { 2072 /* zero rtmp */ 2073 nz = bi[k+1] - bi[k]; 2074 bjtmp = bj + bi[k]; 2075 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2076 2077 /* load in initial unfactored row */ 2078 bval = ba + bi[k]; 2079 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2080 for (j = jmin; j < jmax; j++) { 2081 col = riip[aj[j]]; 2082 if (col >= k) { /* only take upper triangular entry */ 2083 rtmp[col] = aa[j]; 2084 *bval++ = 0.0; /* for in-place factorization */ 2085 } 2086 } 2087 /* shift the diagonal of the matrix: ZeropivotApply() */ 2088 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2089 2090 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2091 dk = rtmp[k]; 2092 i = c2r[k]; /* first row to be added to k_th row */ 2093 2094 while (i < k) { 2095 nexti = c2r[i]; /* next row to be added to k_th row */ 2096 2097 /* compute multiplier, update diag(k) and U(i,k) */ 2098 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2099 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2100 dk += uikdi*ba[ili]; /* update diag[k] */ 2101 ba[ili] = uikdi; /* -U(i,k) */ 2102 2103 /* add multiple of row i to k-th row */ 2104 jmin = ili + 1; jmax = bi[i+1]; 2105 if (jmin < jmax) { 2106 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2107 /* update il and c2r for row i */ 2108 il[i] = jmin; 2109 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2110 } 2111 i = nexti; 2112 } 2113 2114 /* copy data into U(k,:) */ 2115 rs = 0.0; 2116 jmin = bi[k]; jmax = bi[k+1]-1; 2117 if (jmin < jmax) { 2118 for (j=jmin; j<jmax; j++) { 2119 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2120 } 2121 /* add the k-th row into il and c2r */ 2122 il[k] = jmin; 2123 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2124 } 2125 2126 /* MatPivotCheck() */ 2127 sctx.rs = rs; 2128 sctx.pv = dk; 2129 PetscCall(MatPivotCheck(B,A,info,&sctx,i)); 2130 if (sctx.newshift) break; 2131 dk = sctx.pv; 2132 2133 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2134 } 2135 } while (sctx.newshift); 2136 2137 PetscCall(PetscFree3(rtmp,il,c2r)); 2138 PetscCall(ISRestoreIndices(ip,&rip)); 2139 PetscCall(ISRestoreIndices(iip,&riip)); 2140 2141 PetscCall(ISIdentity(ip,&perm_identity)); 2142 if (perm_identity) { 2143 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2144 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2145 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2146 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2147 } else { 2148 B->ops->solve = MatSolve_SeqSBAIJ_1; 2149 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2150 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2151 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2152 } 2153 2154 C->assembled = PETSC_TRUE; 2155 C->preallocated = PETSC_TRUE; 2156 2157 PetscCall(PetscLogFlops(C->rmap->n)); 2158 2159 /* MatPivotView() */ 2160 if (sctx.nshift) { 2161 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2162 PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top)); 2163 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2164 PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 2165 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2166 PetscCall(PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount)); 2167 } 2168 } 2169 PetscFunctionReturn(0); 2170 } 2171 2172 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2173 { 2174 Mat C = B; 2175 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2176 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2177 IS ip=b->row,iip = b->icol; 2178 const PetscInt *rip,*riip; 2179 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2180 PetscInt *ai=a->i,*aj=a->j; 2181 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2182 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2183 PetscBool perm_identity; 2184 FactorShiftCtx sctx; 2185 PetscReal rs; 2186 MatScalar d,*v; 2187 2188 PetscFunctionBegin; 2189 /* MatPivotSetUp(): initialize shift context sctx */ 2190 PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx))); 2191 2192 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2193 sctx.shift_top = info->zeropivot; 2194 for (i=0; i<mbs; i++) { 2195 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2196 d = (aa)[a->diag[i]]; 2197 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2198 v = aa+ai[i]; 2199 nz = ai[i+1] - ai[i]; 2200 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2201 if (rs>sctx.shift_top) sctx.shift_top = rs; 2202 } 2203 sctx.shift_top *= 1.1; 2204 sctx.nshift_max = 5; 2205 sctx.shift_lo = 0.; 2206 sctx.shift_hi = 1.; 2207 } 2208 2209 PetscCall(ISGetIndices(ip,&rip)); 2210 PetscCall(ISGetIndices(iip,&riip)); 2211 2212 /* initialization */ 2213 PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl)); 2214 2215 do { 2216 sctx.newshift = PETSC_FALSE; 2217 2218 for (i=0; i<mbs; i++) jl[i] = mbs; 2219 il[0] = 0; 2220 2221 for (k = 0; k<mbs; k++) { 2222 /* zero rtmp */ 2223 nz = bi[k+1] - bi[k]; 2224 bjtmp = bj + bi[k]; 2225 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2226 2227 bval = ba + bi[k]; 2228 /* initialize k-th row by the perm[k]-th row of A */ 2229 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2230 for (j = jmin; j < jmax; j++) { 2231 col = riip[aj[j]]; 2232 if (col >= k) { /* only take upper triangular entry */ 2233 rtmp[col] = aa[j]; 2234 *bval++ = 0.0; /* for in-place factorization */ 2235 } 2236 } 2237 /* shift the diagonal of the matrix */ 2238 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2239 2240 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2241 dk = rtmp[k]; 2242 i = jl[k]; /* first row to be added to k_th row */ 2243 2244 while (i < k) { 2245 nexti = jl[i]; /* next row to be added to k_th row */ 2246 2247 /* compute multiplier, update diag(k) and U(i,k) */ 2248 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2249 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2250 dk += uikdi*ba[ili]; 2251 ba[ili] = uikdi; /* -U(i,k) */ 2252 2253 /* add multiple of row i to k-th row */ 2254 jmin = ili + 1; jmax = bi[i+1]; 2255 if (jmin < jmax) { 2256 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2257 /* update il and jl for row i */ 2258 il[i] = jmin; 2259 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2260 } 2261 i = nexti; 2262 } 2263 2264 /* shift the diagonals when zero pivot is detected */ 2265 /* compute rs=sum of abs(off-diagonal) */ 2266 rs = 0.0; 2267 jmin = bi[k]+1; 2268 nz = bi[k+1] - jmin; 2269 bcol = bj + jmin; 2270 for (j=0; j<nz; j++) { 2271 rs += PetscAbsScalar(rtmp[bcol[j]]); 2272 } 2273 2274 sctx.rs = rs; 2275 sctx.pv = dk; 2276 PetscCall(MatPivotCheck(B,A,info,&sctx,k)); 2277 if (sctx.newshift) break; 2278 dk = sctx.pv; 2279 2280 /* copy data into U(k,:) */ 2281 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2282 jmin = bi[k]+1; jmax = bi[k+1]; 2283 if (jmin < jmax) { 2284 for (j=jmin; j<jmax; j++) { 2285 col = bj[j]; ba[j] = rtmp[col]; 2286 } 2287 /* add the k-th row into il and jl */ 2288 il[k] = jmin; 2289 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2290 } 2291 } 2292 } while (sctx.newshift); 2293 2294 PetscCall(PetscFree3(rtmp,il,jl)); 2295 PetscCall(ISRestoreIndices(ip,&rip)); 2296 PetscCall(ISRestoreIndices(iip,&riip)); 2297 2298 PetscCall(ISIdentity(ip,&perm_identity)); 2299 if (perm_identity) { 2300 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2301 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2302 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2303 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2304 } else { 2305 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2306 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2307 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2308 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2309 } 2310 2311 C->assembled = PETSC_TRUE; 2312 C->preallocated = PETSC_TRUE; 2313 2314 PetscCall(PetscLogFlops(C->rmap->n)); 2315 if (sctx.nshift) { 2316 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2317 PetscCall(PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 2318 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2319 PetscCall(PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 2320 } 2321 } 2322 PetscFunctionReturn(0); 2323 } 2324 2325 /* 2326 icc() under revised new data structure. 2327 Factored arrays bj and ba are stored as 2328 U(0,:),...,U(i,:),U(n-1,:) 2329 2330 ui=fact->i is an array of size n+1, in which 2331 ui+ 2332 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2333 ui[n]: points to U(n-1,n-1)+1 2334 2335 udiag=fact->diag is an array of size n,in which 2336 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2337 2338 U(i,:) contains udiag[i] as its last entry, i.e., 2339 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2340 */ 2341 2342 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2343 { 2344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2345 Mat_SeqSBAIJ *b; 2346 PetscBool perm_identity,missing; 2347 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2348 const PetscInt *rip,*riip; 2349 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2350 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2351 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2352 PetscReal fill =info->fill,levels=info->levels; 2353 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2354 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2355 PetscBT lnkbt; 2356 IS iperm; 2357 2358 PetscFunctionBegin; 2359 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 2360 PetscCall(MatMissingDiagonal(A,&missing,&d)); 2361 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,d); 2362 PetscCall(ISIdentity(perm,&perm_identity)); 2363 PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm)); 2364 2365 PetscCall(PetscMalloc1(am+1,&ui)); 2366 PetscCall(PetscMalloc1(am+1,&udiag)); 2367 ui[0] = 0; 2368 2369 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2370 if (!levels && perm_identity) { 2371 for (i=0; i<am; i++) { 2372 ncols = ai[i+1] - a->diag[i]; 2373 ui[i+1] = ui[i] + ncols; 2374 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2375 } 2376 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 2377 cols = uj; 2378 for (i=0; i<am; i++) { 2379 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2380 ncols = ai[i+1] - a->diag[i] -1; 2381 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2382 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2383 } 2384 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2385 PetscCall(ISGetIndices(iperm,&riip)); 2386 PetscCall(ISGetIndices(perm,&rip)); 2387 2388 /* initialization */ 2389 PetscCall(PetscMalloc1(am+1,&ajtmp)); 2390 2391 /* jl: linked list for storing indices of the pivot rows 2392 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2393 PetscCall(PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il)); 2394 for (i=0; i<am; i++) { 2395 jl[i] = am; il[i] = 0; 2396 } 2397 2398 /* create and initialize a linked list for storing column indices of the active row k */ 2399 nlnk = am + 1; 2400 PetscCall(PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt)); 2401 2402 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2403 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space)); 2404 current_space = free_space; 2405 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl)); 2406 current_space_lvl = free_space_lvl; 2407 2408 for (k=0; k<am; k++) { /* for each active row k */ 2409 /* initialize lnk by the column indices of row rip[k] of A */ 2410 nzk = 0; 2411 ncols = ai[rip[k]+1] - ai[rip[k]]; 2412 PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k); 2413 ncols_upper = 0; 2414 for (j=0; j<ncols; j++) { 2415 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2416 if (riip[i] >= k) { /* only take upper triangular entry */ 2417 ajtmp[ncols_upper] = i; 2418 ncols_upper++; 2419 } 2420 } 2421 PetscCall(PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt)); 2422 nzk += nlnk; 2423 2424 /* update lnk by computing fill-in for each pivot row to be merged in */ 2425 prow = jl[k]; /* 1st pivot row */ 2426 2427 while (prow < k) { 2428 nextprow = jl[prow]; 2429 2430 /* merge prow into k-th row */ 2431 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2432 jmax = ui[prow+1]; 2433 ncols = jmax-jmin; 2434 i = jmin - ui[prow]; 2435 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2436 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2437 j = *(uj - 1); 2438 PetscCall(PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j)); 2439 nzk += nlnk; 2440 2441 /* update il and jl for prow */ 2442 if (jmin < jmax) { 2443 il[prow] = jmin; 2444 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2445 } 2446 prow = nextprow; 2447 } 2448 2449 /* if free space is not available, make more free space */ 2450 if (current_space->local_remaining<nzk) { 2451 i = am - k + 1; /* num of unfactored rows */ 2452 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2453 PetscCall(PetscFreeSpaceGet(i,¤t_space)); 2454 PetscCall(PetscFreeSpaceGet(i,¤t_space_lvl)); 2455 reallocs++; 2456 } 2457 2458 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2459 PetscCheck(nzk != 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %" PetscInt_FMT " in ICC matrix factor",k); 2460 PetscCall(PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt)); 2461 2462 /* add the k-th row into il and jl */ 2463 if (nzk > 1) { 2464 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2465 jl[k] = jl[i]; jl[i] = k; 2466 il[k] = ui[k] + 1; 2467 } 2468 uj_ptr[k] = current_space->array; 2469 uj_lvl_ptr[k] = current_space_lvl->array; 2470 2471 current_space->array += nzk; 2472 current_space->local_used += nzk; 2473 current_space->local_remaining -= nzk; 2474 2475 current_space_lvl->array += nzk; 2476 current_space_lvl->local_used += nzk; 2477 current_space_lvl->local_remaining -= nzk; 2478 2479 ui[k+1] = ui[k] + nzk; 2480 } 2481 2482 PetscCall(ISRestoreIndices(perm,&rip)); 2483 PetscCall(ISRestoreIndices(iperm,&riip)); 2484 PetscCall(PetscFree4(uj_ptr,uj_lvl_ptr,jl,il)); 2485 PetscCall(PetscFree(ajtmp)); 2486 2487 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2488 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 2489 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag)); /* store matrix factor */ 2490 PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt)); 2491 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2492 2493 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2494 2495 /* put together the new matrix in MATSEQSBAIJ format */ 2496 b = (Mat_SeqSBAIJ*)(fact)->data; 2497 b->singlemalloc = PETSC_FALSE; 2498 2499 PetscCall(PetscMalloc1(ui[am]+1,&b->a)); 2500 2501 b->j = uj; 2502 b->i = ui; 2503 b->diag = udiag; 2504 b->free_diag = PETSC_TRUE; 2505 b->ilen = NULL; 2506 b->imax = NULL; 2507 b->row = perm; 2508 b->col = perm; 2509 PetscCall(PetscObjectReference((PetscObject)perm)); 2510 PetscCall(PetscObjectReference((PetscObject)perm)); 2511 b->icol = iperm; 2512 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2513 2514 PetscCall(PetscMalloc1(am+1,&b->solve_work)); 2515 PetscCall(PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)))); 2516 2517 b->maxnz = b->nz = ui[am]; 2518 b->free_a = PETSC_TRUE; 2519 b->free_ij = PETSC_TRUE; 2520 2521 fact->info.factor_mallocs = reallocs; 2522 fact->info.fill_ratio_given = fill; 2523 if (ai[am] != 0) { 2524 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2525 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2526 } else { 2527 fact->info.fill_ratio_needed = 0.0; 2528 } 2529 #if defined(PETSC_USE_INFO) 2530 if (ai[am] != 0) { 2531 PetscReal af = fact->info.fill_ratio_needed; 2532 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af)); 2533 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 2534 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af)); 2535 } else { 2536 PetscCall(PetscInfo(A,"Empty matrix\n")); 2537 } 2538 #endif 2539 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2540 PetscFunctionReturn(0); 2541 } 2542 2543 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2544 { 2545 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2546 Mat_SeqSBAIJ *b; 2547 PetscBool perm_identity,missing; 2548 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2549 const PetscInt *rip,*riip; 2550 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2551 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2552 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2553 PetscReal fill =info->fill,levels=info->levels; 2554 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2555 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2556 PetscBT lnkbt; 2557 IS iperm; 2558 2559 PetscFunctionBegin; 2560 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 2561 PetscCall(MatMissingDiagonal(A,&missing,&d)); 2562 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,d); 2563 PetscCall(ISIdentity(perm,&perm_identity)); 2564 PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm)); 2565 2566 PetscCall(PetscMalloc1(am+1,&ui)); 2567 PetscCall(PetscMalloc1(am+1,&udiag)); 2568 ui[0] = 0; 2569 2570 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2571 if (!levels && perm_identity) { 2572 2573 for (i=0; i<am; i++) { 2574 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2575 udiag[i] = ui[i]; 2576 } 2577 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 2578 cols = uj; 2579 for (i=0; i<am; i++) { 2580 aj = a->j + a->diag[i]; 2581 ncols = ui[i+1] - ui[i]; 2582 for (j=0; j<ncols; j++) *cols++ = *aj++; 2583 } 2584 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2585 PetscCall(ISGetIndices(iperm,&riip)); 2586 PetscCall(ISGetIndices(perm,&rip)); 2587 2588 /* initialization */ 2589 PetscCall(PetscMalloc1(am+1,&ajtmp)); 2590 2591 /* jl: linked list for storing indices of the pivot rows 2592 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2593 PetscCall(PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il)); 2594 for (i=0; i<am; i++) { 2595 jl[i] = am; il[i] = 0; 2596 } 2597 2598 /* create and initialize a linked list for storing column indices of the active row k */ 2599 nlnk = am + 1; 2600 PetscCall(PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt)); 2601 2602 /* initial FreeSpace size is fill*(ai[am]+1) */ 2603 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space)); 2604 current_space = free_space; 2605 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl)); 2606 current_space_lvl = free_space_lvl; 2607 2608 for (k=0; k<am; k++) { /* for each active row k */ 2609 /* initialize lnk by the column indices of row rip[k] of A */ 2610 nzk = 0; 2611 ncols = ai[rip[k]+1] - ai[rip[k]]; 2612 PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k); 2613 ncols_upper = 0; 2614 for (j=0; j<ncols; j++) { 2615 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2616 if (riip[i] >= k) { /* only take upper triangular entry */ 2617 ajtmp[ncols_upper] = i; 2618 ncols_upper++; 2619 } 2620 } 2621 PetscCall(PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt)); 2622 nzk += nlnk; 2623 2624 /* update lnk by computing fill-in for each pivot row to be merged in */ 2625 prow = jl[k]; /* 1st pivot row */ 2626 2627 while (prow < k) { 2628 nextprow = jl[prow]; 2629 2630 /* merge prow into k-th row */ 2631 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2632 jmax = ui[prow+1]; 2633 ncols = jmax-jmin; 2634 i = jmin - ui[prow]; 2635 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2636 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2637 j = *(uj - 1); 2638 PetscCall(PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j)); 2639 nzk += nlnk; 2640 2641 /* update il and jl for prow */ 2642 if (jmin < jmax) { 2643 il[prow] = jmin; 2644 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2645 } 2646 prow = nextprow; 2647 } 2648 2649 /* if free space is not available, make more free space */ 2650 if (current_space->local_remaining<nzk) { 2651 i = am - k + 1; /* num of unfactored rows */ 2652 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2653 PetscCall(PetscFreeSpaceGet(i,¤t_space)); 2654 PetscCall(PetscFreeSpaceGet(i,¤t_space_lvl)); 2655 reallocs++; 2656 } 2657 2658 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2659 PetscCheck(nzk,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %" PetscInt_FMT " in ICC matrix factor",k); 2660 PetscCall(PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt)); 2661 2662 /* add the k-th row into il and jl */ 2663 if (nzk > 1) { 2664 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2665 jl[k] = jl[i]; jl[i] = k; 2666 il[k] = ui[k] + 1; 2667 } 2668 uj_ptr[k] = current_space->array; 2669 uj_lvl_ptr[k] = current_space_lvl->array; 2670 2671 current_space->array += nzk; 2672 current_space->local_used += nzk; 2673 current_space->local_remaining -= nzk; 2674 2675 current_space_lvl->array += nzk; 2676 current_space_lvl->local_used += nzk; 2677 current_space_lvl->local_remaining -= nzk; 2678 2679 ui[k+1] = ui[k] + nzk; 2680 } 2681 2682 #if defined(PETSC_USE_INFO) 2683 if (ai[am] != 0) { 2684 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2685 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af)); 2686 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 2687 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af)); 2688 } else { 2689 PetscCall(PetscInfo(A,"Empty matrix\n")); 2690 } 2691 #endif 2692 2693 PetscCall(ISRestoreIndices(perm,&rip)); 2694 PetscCall(ISRestoreIndices(iperm,&riip)); 2695 PetscCall(PetscFree4(uj_ptr,uj_lvl_ptr,jl,il)); 2696 PetscCall(PetscFree(ajtmp)); 2697 2698 /* destroy list of free space and other temporary array(s) */ 2699 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 2700 PetscCall(PetscFreeSpaceContiguous(&free_space,uj)); 2701 PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt)); 2702 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2703 2704 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2705 2706 /* put together the new matrix in MATSEQSBAIJ format */ 2707 2708 b = (Mat_SeqSBAIJ*)fact->data; 2709 b->singlemalloc = PETSC_FALSE; 2710 2711 PetscCall(PetscMalloc1(ui[am]+1,&b->a)); 2712 2713 b->j = uj; 2714 b->i = ui; 2715 b->diag = udiag; 2716 b->free_diag = PETSC_TRUE; 2717 b->ilen = NULL; 2718 b->imax = NULL; 2719 b->row = perm; 2720 b->col = perm; 2721 2722 PetscCall(PetscObjectReference((PetscObject)perm)); 2723 PetscCall(PetscObjectReference((PetscObject)perm)); 2724 2725 b->icol = iperm; 2726 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2727 PetscCall(PetscMalloc1(am+1,&b->solve_work)); 2728 PetscCall(PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)))); 2729 b->maxnz = b->nz = ui[am]; 2730 b->free_a = PETSC_TRUE; 2731 b->free_ij = PETSC_TRUE; 2732 2733 fact->info.factor_mallocs = reallocs; 2734 fact->info.fill_ratio_given = fill; 2735 if (ai[am] != 0) { 2736 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2737 } else { 2738 fact->info.fill_ratio_needed = 0.0; 2739 } 2740 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2741 PetscFunctionReturn(0); 2742 } 2743 2744 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2745 { 2746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2747 Mat_SeqSBAIJ *b; 2748 PetscBool perm_identity,missing; 2749 PetscReal fill = info->fill; 2750 const PetscInt *rip,*riip; 2751 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2752 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2753 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2754 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2755 PetscBT lnkbt; 2756 IS iperm; 2757 2758 PetscFunctionBegin; 2759 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 2760 PetscCall(MatMissingDiagonal(A,&missing,&i)); 2761 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 2762 2763 /* check whether perm is the identity mapping */ 2764 PetscCall(ISIdentity(perm,&perm_identity)); 2765 PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm)); 2766 PetscCall(ISGetIndices(iperm,&riip)); 2767 PetscCall(ISGetIndices(perm,&rip)); 2768 2769 /* initialization */ 2770 PetscCall(PetscMalloc1(am+1,&ui)); 2771 PetscCall(PetscMalloc1(am+1,&udiag)); 2772 ui[0] = 0; 2773 2774 /* jl: linked list for storing indices of the pivot rows 2775 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2776 PetscCall(PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols)); 2777 for (i=0; i<am; i++) { 2778 jl[i] = am; il[i] = 0; 2779 } 2780 2781 /* create and initialize a linked list for storing column indices of the active row k */ 2782 nlnk = am + 1; 2783 PetscCall(PetscLLCreate(am,am,nlnk,lnk,lnkbt)); 2784 2785 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2786 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space)); 2787 current_space = free_space; 2788 2789 for (k=0; k<am; k++) { /* for each active row k */ 2790 /* initialize lnk by the column indices of row rip[k] of A */ 2791 nzk = 0; 2792 ncols = ai[rip[k]+1] - ai[rip[k]]; 2793 PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k); 2794 ncols_upper = 0; 2795 for (j=0; j<ncols; j++) { 2796 i = riip[*(aj + ai[rip[k]] + j)]; 2797 if (i >= k) { /* only take upper triangular entry */ 2798 cols[ncols_upper] = i; 2799 ncols_upper++; 2800 } 2801 } 2802 PetscCall(PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt)); 2803 nzk += nlnk; 2804 2805 /* update lnk by computing fill-in for each pivot row to be merged in */ 2806 prow = jl[k]; /* 1st pivot row */ 2807 2808 while (prow < k) { 2809 nextprow = jl[prow]; 2810 /* merge prow into k-th row */ 2811 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2812 jmax = ui[prow+1]; 2813 ncols = jmax-jmin; 2814 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2815 PetscCall(PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt)); 2816 nzk += nlnk; 2817 2818 /* update il and jl for prow */ 2819 if (jmin < jmax) { 2820 il[prow] = jmin; 2821 j = *uj_ptr; 2822 jl[prow] = jl[j]; 2823 jl[j] = prow; 2824 } 2825 prow = nextprow; 2826 } 2827 2828 /* if free space is not available, make more free space */ 2829 if (current_space->local_remaining<nzk) { 2830 i = am - k + 1; /* num of unfactored rows */ 2831 i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2832 PetscCall(PetscFreeSpaceGet(i,¤t_space)); 2833 reallocs++; 2834 } 2835 2836 /* copy data into free space, then initialize lnk */ 2837 PetscCall(PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt)); 2838 2839 /* add the k-th row into il and jl */ 2840 if (nzk > 1) { 2841 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2842 jl[k] = jl[i]; jl[i] = k; 2843 il[k] = ui[k] + 1; 2844 } 2845 ui_ptr[k] = current_space->array; 2846 2847 current_space->array += nzk; 2848 current_space->local_used += nzk; 2849 current_space->local_remaining -= nzk; 2850 2851 ui[k+1] = ui[k] + nzk; 2852 } 2853 2854 PetscCall(ISRestoreIndices(perm,&rip)); 2855 PetscCall(ISRestoreIndices(iperm,&riip)); 2856 PetscCall(PetscFree4(ui_ptr,jl,il,cols)); 2857 2858 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2859 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 2860 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag)); /* store matrix factor */ 2861 PetscCall(PetscLLDestroy(lnk,lnkbt)); 2862 2863 /* put together the new matrix in MATSEQSBAIJ format */ 2864 2865 b = (Mat_SeqSBAIJ*)fact->data; 2866 b->singlemalloc = PETSC_FALSE; 2867 b->free_a = PETSC_TRUE; 2868 b->free_ij = PETSC_TRUE; 2869 2870 PetscCall(PetscMalloc1(ui[am]+1,&b->a)); 2871 2872 b->j = uj; 2873 b->i = ui; 2874 b->diag = udiag; 2875 b->free_diag = PETSC_TRUE; 2876 b->ilen = NULL; 2877 b->imax = NULL; 2878 b->row = perm; 2879 b->col = perm; 2880 2881 PetscCall(PetscObjectReference((PetscObject)perm)); 2882 PetscCall(PetscObjectReference((PetscObject)perm)); 2883 2884 b->icol = iperm; 2885 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2886 2887 PetscCall(PetscMalloc1(am+1,&b->solve_work)); 2888 PetscCall(PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)))); 2889 2890 b->maxnz = b->nz = ui[am]; 2891 2892 fact->info.factor_mallocs = reallocs; 2893 fact->info.fill_ratio_given = fill; 2894 if (ai[am] != 0) { 2895 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2896 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2897 } else { 2898 fact->info.fill_ratio_needed = 0.0; 2899 } 2900 #if defined(PETSC_USE_INFO) 2901 if (ai[am] != 0) { 2902 PetscReal af = fact->info.fill_ratio_needed; 2903 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af)); 2904 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 2905 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af)); 2906 } else { 2907 PetscCall(PetscInfo(A,"Empty matrix\n")); 2908 } 2909 #endif 2910 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2911 PetscFunctionReturn(0); 2912 } 2913 2914 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2915 { 2916 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2917 Mat_SeqSBAIJ *b; 2918 PetscBool perm_identity,missing; 2919 PetscReal fill = info->fill; 2920 const PetscInt *rip,*riip; 2921 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2922 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2923 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2924 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2925 PetscBT lnkbt; 2926 IS iperm; 2927 2928 PetscFunctionBegin; 2929 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 2930 PetscCall(MatMissingDiagonal(A,&missing,&i)); 2931 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 2932 2933 /* check whether perm is the identity mapping */ 2934 PetscCall(ISIdentity(perm,&perm_identity)); 2935 PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm)); 2936 PetscCall(ISGetIndices(iperm,&riip)); 2937 PetscCall(ISGetIndices(perm,&rip)); 2938 2939 /* initialization */ 2940 PetscCall(PetscMalloc1(am+1,&ui)); 2941 ui[0] = 0; 2942 2943 /* jl: linked list for storing indices of the pivot rows 2944 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2945 PetscCall(PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols)); 2946 for (i=0; i<am; i++) { 2947 jl[i] = am; il[i] = 0; 2948 } 2949 2950 /* create and initialize a linked list for storing column indices of the active row k */ 2951 nlnk = am + 1; 2952 PetscCall(PetscLLCreate(am,am,nlnk,lnk,lnkbt)); 2953 2954 /* initial FreeSpace size is fill*(ai[am]+1) */ 2955 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space)); 2956 current_space = free_space; 2957 2958 for (k=0; k<am; k++) { /* for each active row k */ 2959 /* initialize lnk by the column indices of row rip[k] of A */ 2960 nzk = 0; 2961 ncols = ai[rip[k]+1] - ai[rip[k]]; 2962 PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k); 2963 ncols_upper = 0; 2964 for (j=0; j<ncols; j++) { 2965 i = riip[*(aj + ai[rip[k]] + j)]; 2966 if (i >= k) { /* only take upper triangular entry */ 2967 cols[ncols_upper] = i; 2968 ncols_upper++; 2969 } 2970 } 2971 PetscCall(PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt)); 2972 nzk += nlnk; 2973 2974 /* update lnk by computing fill-in for each pivot row to be merged in */ 2975 prow = jl[k]; /* 1st pivot row */ 2976 2977 while (prow < k) { 2978 nextprow = jl[prow]; 2979 /* merge prow into k-th row */ 2980 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2981 jmax = ui[prow+1]; 2982 ncols = jmax-jmin; 2983 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2984 PetscCall(PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt)); 2985 nzk += nlnk; 2986 2987 /* update il and jl for prow */ 2988 if (jmin < jmax) { 2989 il[prow] = jmin; 2990 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2991 } 2992 prow = nextprow; 2993 } 2994 2995 /* if free space is not available, make more free space */ 2996 if (current_space->local_remaining<nzk) { 2997 i = am - k + 1; /* num of unfactored rows */ 2998 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2999 PetscCall(PetscFreeSpaceGet(i,¤t_space)); 3000 reallocs++; 3001 } 3002 3003 /* copy data into free space, then initialize lnk */ 3004 PetscCall(PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt)); 3005 3006 /* add the k-th row into il and jl */ 3007 if (nzk-1 > 0) { 3008 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3009 jl[k] = jl[i]; jl[i] = k; 3010 il[k] = ui[k] + 1; 3011 } 3012 ui_ptr[k] = current_space->array; 3013 3014 current_space->array += nzk; 3015 current_space->local_used += nzk; 3016 current_space->local_remaining -= nzk; 3017 3018 ui[k+1] = ui[k] + nzk; 3019 } 3020 3021 #if defined(PETSC_USE_INFO) 3022 if (ai[am] != 0) { 3023 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3024 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af)); 3025 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 3026 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af)); 3027 } else { 3028 PetscCall(PetscInfo(A,"Empty matrix\n")); 3029 } 3030 #endif 3031 3032 PetscCall(ISRestoreIndices(perm,&rip)); 3033 PetscCall(ISRestoreIndices(iperm,&riip)); 3034 PetscCall(PetscFree4(ui_ptr,jl,il,cols)); 3035 3036 /* destroy list of free space and other temporary array(s) */ 3037 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 3038 PetscCall(PetscFreeSpaceContiguous(&free_space,uj)); 3039 PetscCall(PetscLLDestroy(lnk,lnkbt)); 3040 3041 /* put together the new matrix in MATSEQSBAIJ format */ 3042 3043 b = (Mat_SeqSBAIJ*)fact->data; 3044 b->singlemalloc = PETSC_FALSE; 3045 b->free_a = PETSC_TRUE; 3046 b->free_ij = PETSC_TRUE; 3047 3048 PetscCall(PetscMalloc1(ui[am]+1,&b->a)); 3049 3050 b->j = uj; 3051 b->i = ui; 3052 b->diag = NULL; 3053 b->ilen = NULL; 3054 b->imax = NULL; 3055 b->row = perm; 3056 b->col = perm; 3057 3058 PetscCall(PetscObjectReference((PetscObject)perm)); 3059 PetscCall(PetscObjectReference((PetscObject)perm)); 3060 3061 b->icol = iperm; 3062 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3063 3064 PetscCall(PetscMalloc1(am+1,&b->solve_work)); 3065 PetscCall(PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)))); 3066 b->maxnz = b->nz = ui[am]; 3067 3068 fact->info.factor_mallocs = reallocs; 3069 fact->info.fill_ratio_given = fill; 3070 if (ai[am] != 0) { 3071 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3072 } else { 3073 fact->info.fill_ratio_needed = 0.0; 3074 } 3075 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3076 PetscFunctionReturn(0); 3077 } 3078 3079 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3080 { 3081 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3082 PetscInt n = A->rmap->n; 3083 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3084 PetscScalar *x,sum; 3085 const PetscScalar *b; 3086 const MatScalar *aa = a->a,*v; 3087 PetscInt i,nz; 3088 3089 PetscFunctionBegin; 3090 if (!n) PetscFunctionReturn(0); 3091 3092 PetscCall(VecGetArrayRead(bb,&b)); 3093 PetscCall(VecGetArrayWrite(xx,&x)); 3094 3095 /* forward solve the lower triangular */ 3096 x[0] = b[0]; 3097 v = aa; 3098 vi = aj; 3099 for (i=1; i<n; i++) { 3100 nz = ai[i+1] - ai[i]; 3101 sum = b[i]; 3102 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3103 v += nz; 3104 vi += nz; 3105 x[i] = sum; 3106 } 3107 3108 /* backward solve the upper triangular */ 3109 for (i=n-1; i>=0; i--) { 3110 v = aa + adiag[i+1] + 1; 3111 vi = aj + adiag[i+1] + 1; 3112 nz = adiag[i] - adiag[i+1]-1; 3113 sum = x[i]; 3114 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3115 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3116 } 3117 3118 PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n)); 3119 PetscCall(VecRestoreArrayRead(bb,&b)); 3120 PetscCall(VecRestoreArrayWrite(xx,&x)); 3121 PetscFunctionReturn(0); 3122 } 3123 3124 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3125 { 3126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3127 IS iscol = a->col,isrow = a->row; 3128 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3129 const PetscInt *rout,*cout,*r,*c; 3130 PetscScalar *x,*tmp,sum; 3131 const PetscScalar *b; 3132 const MatScalar *aa = a->a,*v; 3133 3134 PetscFunctionBegin; 3135 if (!n) PetscFunctionReturn(0); 3136 3137 PetscCall(VecGetArrayRead(bb,&b)); 3138 PetscCall(VecGetArrayWrite(xx,&x)); 3139 tmp = a->solve_work; 3140 3141 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 3142 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 3143 3144 /* forward solve the lower triangular */ 3145 tmp[0] = b[r[0]]; 3146 v = aa; 3147 vi = aj; 3148 for (i=1; i<n; i++) { 3149 nz = ai[i+1] - ai[i]; 3150 sum = b[r[i]]; 3151 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3152 tmp[i] = sum; 3153 v += nz; vi += nz; 3154 } 3155 3156 /* backward solve the upper triangular */ 3157 for (i=n-1; i>=0; i--) { 3158 v = aa + adiag[i+1]+1; 3159 vi = aj + adiag[i+1]+1; 3160 nz = adiag[i]-adiag[i+1]-1; 3161 sum = tmp[i]; 3162 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3163 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3164 } 3165 3166 PetscCall(ISRestoreIndices(isrow,&rout)); 3167 PetscCall(ISRestoreIndices(iscol,&cout)); 3168 PetscCall(VecRestoreArrayRead(bb,&b)); 3169 PetscCall(VecRestoreArrayWrite(xx,&x)); 3170 PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n)); 3171 PetscFunctionReturn(0); 3172 } 3173 3174 /* 3175 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3176 */ 3177 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3178 { 3179 Mat B = *fact; 3180 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3181 IS isicol; 3182 const PetscInt *r,*ic; 3183 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3184 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3185 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3186 PetscInt nlnk,*lnk; 3187 PetscBT lnkbt; 3188 PetscBool row_identity,icol_identity; 3189 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3190 const PetscInt *ics; 3191 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3192 PetscReal dt =info->dt,shift=info->shiftamount; 3193 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3194 PetscBool missing; 3195 3196 PetscFunctionBegin; 3197 if (dt == PETSC_DEFAULT) dt = 0.005; 3198 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3199 3200 /* ------- symbolic factorization, can be reused ---------*/ 3201 PetscCall(MatMissingDiagonal(A,&missing,&i)); 3202 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 3203 adiag=a->diag; 3204 3205 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 3206 3207 /* bdiag is location of diagonal in factor */ 3208 PetscCall(PetscMalloc1(n+1,&bdiag)); /* becomes b->diag */ 3209 PetscCall(PetscMalloc1(n+1,&bdiag_rev)); /* temporary */ 3210 3211 /* allocate row pointers bi */ 3212 PetscCall(PetscMalloc1(2*n+2,&bi)); 3213 3214 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3215 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3216 nnz_max = ai[n]+2*n*dtcount+2; 3217 3218 PetscCall(PetscMalloc1(nnz_max+1,&bj)); 3219 PetscCall(PetscMalloc1(nnz_max+1,&ba)); 3220 3221 /* put together the new matrix */ 3222 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL)); 3223 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol)); 3224 b = (Mat_SeqAIJ*)B->data; 3225 3226 b->free_a = PETSC_TRUE; 3227 b->free_ij = PETSC_TRUE; 3228 b->singlemalloc = PETSC_FALSE; 3229 3230 b->a = ba; 3231 b->j = bj; 3232 b->i = bi; 3233 b->diag = bdiag; 3234 b->ilen = NULL; 3235 b->imax = NULL; 3236 b->row = isrow; 3237 b->col = iscol; 3238 PetscCall(PetscObjectReference((PetscObject)isrow)); 3239 PetscCall(PetscObjectReference((PetscObject)iscol)); 3240 b->icol = isicol; 3241 3242 PetscCall(PetscMalloc1(n+1,&b->solve_work)); 3243 PetscCall(PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)))); 3244 b->maxnz = nnz_max; 3245 3246 B->factortype = MAT_FACTOR_ILUDT; 3247 B->info.factor_mallocs = 0; 3248 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3249 /* ------- end of symbolic factorization ---------*/ 3250 3251 PetscCall(ISGetIndices(isrow,&r)); 3252 PetscCall(ISGetIndices(isicol,&ic)); 3253 ics = ic; 3254 3255 /* linked list for storing column indices of the active row */ 3256 nlnk = n + 1; 3257 PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt)); 3258 3259 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3260 PetscCall(PetscMalloc2(n,&im,n,&jtmp)); 3261 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3262 PetscCall(PetscMalloc2(n,&rtmp,n,&vtmp)); 3263 PetscCall(PetscArrayzero(rtmp,n)); 3264 3265 bi[0] = 0; 3266 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3267 bdiag_rev[n] = bdiag[0]; 3268 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3269 for (i=0; i<n; i++) { 3270 /* copy initial fill into linked list */ 3271 nzi = ai[r[i]+1] - ai[r[i]]; 3272 PetscCheck(nzi,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i); 3273 nzi_al = adiag[r[i]] - ai[r[i]]; 3274 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3275 ajtmp = aj + ai[r[i]]; 3276 PetscCall(PetscLLAddPerm(nzi,ajtmp,ic,n,&nlnk,lnk,lnkbt)); 3277 3278 /* load in initial (unfactored row) */ 3279 aatmp = a->a + ai[r[i]]; 3280 for (j=0; j<nzi; j++) { 3281 rtmp[ics[*ajtmp++]] = *aatmp++; 3282 } 3283 3284 /* add pivot rows into linked list */ 3285 row = lnk[n]; 3286 while (row < i) { 3287 nzi_bl = bi[row+1] - bi[row] + 1; 3288 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3289 PetscCall(PetscLLAddSortedLU(bjtmp,row,&nlnk,lnk,lnkbt,i,nzi_bl,im)); 3290 nzi += nlnk; 3291 row = lnk[row]; 3292 } 3293 3294 /* copy data from lnk into jtmp, then initialize lnk */ 3295 PetscCall(PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt)); 3296 3297 /* numerical factorization */ 3298 bjtmp = jtmp; 3299 row = *bjtmp++; /* 1st pivot row */ 3300 while (row < i) { 3301 pc = rtmp + row; 3302 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3303 multiplier = (*pc) * (*pv); 3304 *pc = multiplier; 3305 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3306 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3307 pv = ba + bdiag[row+1] + 1; 3308 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3309 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3310 PetscCall(PetscLogFlops(1+2.0*nz)); 3311 } 3312 row = *bjtmp++; 3313 } 3314 3315 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3316 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3317 nzi_bl = 0; j = 0; 3318 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3319 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3320 nzi_bl++; j++; 3321 } 3322 nzi_bu = nzi - nzi_bl -1; 3323 while (j < nzi) { 3324 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3325 j++; 3326 } 3327 3328 bjtmp = bj + bi[i]; 3329 batmp = ba + bi[i]; 3330 /* apply level dropping rule to L part */ 3331 ncut = nzi_al + dtcount; 3332 if (ncut < nzi_bl) { 3333 PetscCall(PetscSortSplit(ncut,nzi_bl,vtmp,jtmp)); 3334 PetscCall(PetscSortIntWithScalarArray(ncut,jtmp,vtmp)); 3335 } else { 3336 ncut = nzi_bl; 3337 } 3338 for (j=0; j<ncut; j++) { 3339 bjtmp[j] = jtmp[j]; 3340 batmp[j] = vtmp[j]; 3341 } 3342 bi[i+1] = bi[i] + ncut; 3343 nzi = ncut + 1; 3344 3345 /* apply level dropping rule to U part */ 3346 ncut = nzi_au + dtcount; 3347 if (ncut < nzi_bu) { 3348 PetscCall(PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1)); 3349 PetscCall(PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1)); 3350 } else { 3351 ncut = nzi_bu; 3352 } 3353 nzi += ncut; 3354 3355 /* mark bdiagonal */ 3356 bdiag[i+1] = bdiag[i] - (ncut + 1); 3357 bdiag_rev[n-i-1] = bdiag[i+1]; 3358 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3359 bjtmp = bj + bdiag[i]; 3360 batmp = ba + bdiag[i]; 3361 *bjtmp = i; 3362 *batmp = diag_tmp; /* rtmp[i]; */ 3363 if (*batmp == 0.0) { 3364 *batmp = dt+shift; 3365 } 3366 *batmp = 1.0/(*batmp); /* invert diagonal entries for simpler triangular solves */ 3367 3368 bjtmp = bj + bdiag[i+1]+1; 3369 batmp = ba + bdiag[i+1]+1; 3370 for (k=0; k<ncut; k++) { 3371 bjtmp[k] = jtmp[nzi_bl+1+k]; 3372 batmp[k] = vtmp[nzi_bl+1+k]; 3373 } 3374 3375 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3376 } /* for (i=0; i<n; i++) */ 3377 PetscCheck(bi[n] < bdiag[n],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT,bi[n],bdiag[n]); 3378 3379 PetscCall(ISRestoreIndices(isrow,&r)); 3380 PetscCall(ISRestoreIndices(isicol,&ic)); 3381 3382 PetscCall(PetscLLDestroy(lnk,lnkbt)); 3383 PetscCall(PetscFree2(im,jtmp)); 3384 PetscCall(PetscFree2(rtmp,vtmp)); 3385 PetscCall(PetscFree(bdiag_rev)); 3386 3387 PetscCall(PetscLogFlops(B->cmap->n)); 3388 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3389 3390 PetscCall(ISIdentity(isrow,&row_identity)); 3391 PetscCall(ISIdentity(isicol,&icol_identity)); 3392 if (row_identity && icol_identity) { 3393 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3394 } else { 3395 B->ops->solve = MatSolve_SeqAIJ; 3396 } 3397 3398 B->ops->solveadd = NULL; 3399 B->ops->solvetranspose = NULL; 3400 B->ops->solvetransposeadd = NULL; 3401 B->ops->matsolve = NULL; 3402 B->assembled = PETSC_TRUE; 3403 B->preallocated = PETSC_TRUE; 3404 PetscFunctionReturn(0); 3405 } 3406 3407 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3408 /* 3409 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3410 */ 3411 3412 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3413 { 3414 PetscFunctionBegin; 3415 PetscCall(MatILUDTFactor_SeqAIJ(A,row,col,info,&fact)); 3416 PetscFunctionReturn(0); 3417 } 3418 3419 /* 3420 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3421 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3422 */ 3423 /* 3424 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3425 */ 3426 3427 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3428 { 3429 Mat C =fact; 3430 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3431 IS isrow = b->row,isicol = b->icol; 3432 const PetscInt *r,*ic,*ics; 3433 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3434 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3435 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3436 PetscReal dt=info->dt,shift=info->shiftamount; 3437 PetscBool row_identity, col_identity; 3438 3439 PetscFunctionBegin; 3440 PetscCall(ISGetIndices(isrow,&r)); 3441 PetscCall(ISGetIndices(isicol,&ic)); 3442 PetscCall(PetscMalloc1(n+1,&rtmp)); 3443 ics = ic; 3444 3445 for (i=0; i<n; i++) { 3446 /* initialize rtmp array */ 3447 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3448 bjtmp = bj + bi[i]; 3449 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3450 rtmp[i] = 0.0; 3451 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3452 bjtmp = bj + bdiag[i+1] + 1; 3453 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3454 3455 /* load in initial unfactored row of A */ 3456 nz = ai[r[i]+1] - ai[r[i]]; 3457 ajtmp = aj + ai[r[i]]; 3458 v = aa + ai[r[i]]; 3459 for (j=0; j<nz; j++) { 3460 rtmp[ics[*ajtmp++]] = v[j]; 3461 } 3462 3463 /* numerical factorization */ 3464 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3465 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3466 k = 0; 3467 while (k < nzl) { 3468 row = *bjtmp++; 3469 pc = rtmp + row; 3470 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3471 multiplier = (*pc) * (*pv); 3472 *pc = multiplier; 3473 if (PetscAbsScalar(multiplier) > dt) { 3474 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3475 pv = b->a + bdiag[row+1] + 1; 3476 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3477 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3478 PetscCall(PetscLogFlops(1+2.0*nz)); 3479 } 3480 k++; 3481 } 3482 3483 /* finished row so stick it into b->a */ 3484 /* L-part */ 3485 pv = b->a + bi[i]; 3486 pj = bj + bi[i]; 3487 nzl = bi[i+1] - bi[i]; 3488 for (j=0; j<nzl; j++) { 3489 pv[j] = rtmp[pj[j]]; 3490 } 3491 3492 /* diagonal: invert diagonal entries for simpler triangular solves */ 3493 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3494 b->a[bdiag[i]] = 1.0/rtmp[i]; 3495 3496 /* U-part */ 3497 pv = b->a + bdiag[i+1] + 1; 3498 pj = bj + bdiag[i+1] + 1; 3499 nzu = bdiag[i] - bdiag[i+1] - 1; 3500 for (j=0; j<nzu; j++) { 3501 pv[j] = rtmp[pj[j]]; 3502 } 3503 } 3504 3505 PetscCall(PetscFree(rtmp)); 3506 PetscCall(ISRestoreIndices(isicol,&ic)); 3507 PetscCall(ISRestoreIndices(isrow,&r)); 3508 3509 PetscCall(ISIdentity(isrow,&row_identity)); 3510 PetscCall(ISIdentity(isicol,&col_identity)); 3511 if (row_identity && col_identity) { 3512 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3513 } else { 3514 C->ops->solve = MatSolve_SeqAIJ; 3515 } 3516 C->ops->solveadd = NULL; 3517 C->ops->solvetranspose = NULL; 3518 C->ops->solvetransposeadd = NULL; 3519 C->ops->matsolve = NULL; 3520 C->assembled = PETSC_TRUE; 3521 C->preallocated = PETSC_TRUE; 3522 3523 PetscCall(PetscLogFlops(C->cmap->n)); 3524 PetscFunctionReturn(0); 3525 } 3526