1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 /* 8 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9 10 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 11 */ 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 15 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 16 const PetscInt *ai = a->i, *aj = a->j; 17 const PetscScalar *aa = a->a; 18 PetscBool *done; 19 PetscReal best,past = 0,future; 20 21 PetscFunctionBegin; 22 /* pick initial row */ 23 best = -1; 24 for (i=0; i<n; i++) { 25 future = 0.0; 26 for (j=ai[i]; j<ai[i+1]; j++) { 27 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28 else past = PetscAbsScalar(aa[j]); 29 } 30 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 31 if (past/future > best) { 32 best = past/future; 33 current = i; 34 } 35 } 36 37 PetscCall(PetscMalloc1(n,&done)); 38 PetscCall(PetscArrayzero(done,n)); 39 PetscCall(PetscMalloc1(n,&order)); 40 order[0] = current; 41 for (i=0; i<n-1; i++) { 42 done[current] = PETSC_TRUE; 43 best = -1; 44 /* loop over all neighbors of current pivot */ 45 for (j=ai[current]; j<ai[current+1]; j++) { 46 jj = aj[j]; 47 if (done[jj]) continue; 48 /* loop over columns of potential next row computing weights for below and above diagonal */ 49 past = future = 0.0; 50 for (k=ai[jj]; k<ai[jj+1]; k++) { 51 kk = aj[k]; 52 if (done[kk]) past += PetscAbsScalar(aa[k]); 53 else if (kk != jj) future += PetscAbsScalar(aa[k]); 54 } 55 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 56 if (past/future > best) { 57 best = past/future; 58 newcurrent = jj; 59 } 60 } 61 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 62 best = -1; 63 for (k=0; k<n; k++) { 64 if (done[k]) continue; 65 future = 0.0; 66 past = 0.0; 67 for (j=ai[k]; j<ai[k+1]; j++) { 68 kk = aj[j]; 69 if (done[kk]) past += PetscAbsScalar(aa[j]); 70 else if (kk != k) future += PetscAbsScalar(aa[j]); 71 } 72 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 73 if (past/future > best) { 74 best = past/future; 75 newcurrent = k; 76 } 77 } 78 } 79 PetscCheck(current != newcurrent,PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 80 current = newcurrent; 81 order[i+1] = current; 82 } 83 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow)); 84 *icol = *irow; 85 PetscCall(PetscObjectReference((PetscObject)*irow)); 86 PetscCall(PetscFree(done)); 87 PetscCall(PetscFree(order)); 88 PetscFunctionReturn(0); 89 } 90 91 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A,MatSolverType *type) 92 { 93 PetscFunctionBegin; 94 *type = MATSOLVERPETSC; 95 PetscFunctionReturn(0); 96 } 97 98 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 99 { 100 PetscInt n = A->rmap->n; 101 102 PetscFunctionBegin; 103 #if defined(PETSC_USE_COMPLEX) 104 PetscCheck(!A->hermitian || A->symmetric || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC),PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported"); 105 #endif 106 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B)); 107 PetscCall(MatSetSizes(*B,n,n,n,n)); 108 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 109 PetscCall(MatSetType(*B,MATSEQAIJ)); 110 111 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 112 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 113 114 PetscCall(MatSetBlockSizesFromMats(*B,A,A)); 115 PetscCall(PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_LU])); 116 PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILU])); 117 PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 118 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 119 PetscCall(MatSetType(*B,MATSEQSBAIJ)); 120 PetscCall(MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL)); 121 122 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 123 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 124 PetscCall(PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 125 PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ICC])); 126 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 127 (*B)->factortype = ftype; 128 129 PetscCall(PetscFree((*B)->solvertype)); 130 PetscCall(PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype)); 131 (*B)->canuseordering = PETSC_TRUE; 132 PetscCall(PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverType_C",MatFactorGetSolverType_petsc)); 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 137 { 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 139 IS isicol; 140 const PetscInt *r,*ic; 141 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 142 PetscInt *bi,*bj,*ajtmp; 143 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 144 PetscReal f; 145 PetscInt nlnk,*lnk,k,**bi_ptr; 146 PetscFreeSpaceList free_space=NULL,current_space=NULL; 147 PetscBT lnkbt; 148 PetscBool missing; 149 150 PetscFunctionBegin; 151 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 152 PetscCall(MatMissingDiagonal(A,&missing,&i)); 153 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 154 155 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 156 PetscCall(ISGetIndices(isrow,&r)); 157 PetscCall(ISGetIndices(isicol,&ic)); 158 159 /* get new row pointers */ 160 PetscCall(PetscMalloc1(n+1,&bi)); 161 bi[0] = 0; 162 163 /* bdiag is location of diagonal in factor */ 164 PetscCall(PetscMalloc1(n+1,&bdiag)); 165 bdiag[0] = 0; 166 167 /* linked list for storing column indices of the active row */ 168 nlnk = n + 1; 169 PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt)); 170 171 PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im)); 172 173 /* initial FreeSpace size is f*(ai[n]+1) */ 174 f = info->fill; 175 if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 176 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 177 current_space = free_space; 178 179 for (i=0; i<n; i++) { 180 /* copy previous fill into linked list */ 181 nzi = 0; 182 nnz = ai[r[i]+1] - ai[r[i]]; 183 ajtmp = aj + ai[r[i]]; 184 PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt)); 185 nzi += nlnk; 186 187 /* add pivot rows into linked list */ 188 row = lnk[n]; 189 while (row < i) { 190 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 191 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 192 PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im)); 193 nzi += nlnk; 194 row = lnk[row]; 195 } 196 bi[i+1] = bi[i] + nzi; 197 im[i] = nzi; 198 199 /* mark bdiag */ 200 nzbd = 0; 201 nnz = nzi; 202 k = lnk[n]; 203 while (nnz-- && k < i) { 204 nzbd++; 205 k = lnk[k]; 206 } 207 bdiag[i] = bi[i] + nzbd; 208 209 /* if free space is not available, make more free space */ 210 if (current_space->local_remaining<nzi) { 211 nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 212 PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 213 reallocs++; 214 } 215 216 /* copy data into free space, then initialize lnk */ 217 PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt)); 218 219 bi_ptr[i] = current_space->array; 220 current_space->array += nzi; 221 current_space->local_used += nzi; 222 current_space->local_remaining -= nzi; 223 } 224 #if defined(PETSC_USE_INFO) 225 if (ai[n] != 0) { 226 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 227 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 228 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 229 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af)); 230 PetscCall(PetscInfo(A,"for best performance.\n")); 231 } else { 232 PetscCall(PetscInfo(A,"Empty matrix\n")); 233 } 234 #endif 235 236 PetscCall(ISRestoreIndices(isrow,&r)); 237 PetscCall(ISRestoreIndices(isicol,&ic)); 238 239 /* destroy list of free space and other temporary array(s) */ 240 PetscCall(PetscMalloc1(bi[n]+1,&bj)); 241 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 242 PetscCall(PetscLLDestroy(lnk,lnkbt)); 243 PetscCall(PetscFree2(bi_ptr,im)); 244 245 /* put together the new matrix */ 246 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL)); 247 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol)); 248 b = (Mat_SeqAIJ*)(B)->data; 249 250 b->free_a = PETSC_TRUE; 251 b->free_ij = PETSC_TRUE; 252 b->singlemalloc = PETSC_FALSE; 253 254 PetscCall(PetscMalloc1(bi[n]+1,&b->a)); 255 b->j = bj; 256 b->i = bi; 257 b->diag = bdiag; 258 b->ilen = NULL; 259 b->imax = NULL; 260 b->row = isrow; 261 b->col = iscol; 262 PetscCall(PetscObjectReference((PetscObject)isrow)); 263 PetscCall(PetscObjectReference((PetscObject)iscol)); 264 b->icol = isicol; 265 PetscCall(PetscMalloc1(n+1,&b->solve_work)); 266 267 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 268 PetscCall(PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)))); 269 b->maxnz = b->nz = bi[n]; 270 271 (B)->factortype = MAT_FACTOR_LU; 272 (B)->info.factor_mallocs = reallocs; 273 (B)->info.fill_ratio_given = f; 274 275 if (ai[n]) { 276 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 277 } else { 278 (B)->info.fill_ratio_needed = 0.0; 279 } 280 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 281 if (a->inode.size) { 282 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 283 } 284 PetscFunctionReturn(0); 285 } 286 287 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 288 { 289 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 290 IS isicol; 291 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 292 PetscInt i,n=A->rmap->n; 293 PetscInt *bi,*bj; 294 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 295 PetscReal f; 296 PetscInt nlnk,*lnk,k,**bi_ptr; 297 PetscFreeSpaceList free_space=NULL,current_space=NULL; 298 PetscBT lnkbt; 299 PetscBool missing; 300 301 PetscFunctionBegin; 302 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 303 PetscCall(MatMissingDiagonal(A,&missing,&i)); 304 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 305 306 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 307 PetscCall(ISGetIndices(isrow,&r)); 308 PetscCall(ISGetIndices(isicol,&ic)); 309 310 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 311 PetscCall(PetscMalloc1(n+1,&bi)); 312 PetscCall(PetscMalloc1(n+1,&bdiag)); 313 bi[0] = bdiag[0] = 0; 314 315 /* linked list for storing column indices of the active row */ 316 nlnk = n + 1; 317 PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt)); 318 319 PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im)); 320 321 /* initial FreeSpace size is f*(ai[n]+1) */ 322 f = info->fill; 323 if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 324 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 325 current_space = free_space; 326 327 for (i=0; i<n; i++) { 328 /* copy previous fill into linked list */ 329 nzi = 0; 330 nnz = ai[r[i]+1] - ai[r[i]]; 331 ajtmp = aj + ai[r[i]]; 332 PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt)); 333 nzi += nlnk; 334 335 /* add pivot rows into linked list */ 336 row = lnk[n]; 337 while (row < i) { 338 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 339 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 340 PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im)); 341 nzi += nlnk; 342 row = lnk[row]; 343 } 344 bi[i+1] = bi[i] + nzi; 345 im[i] = nzi; 346 347 /* mark bdiag */ 348 nzbd = 0; 349 nnz = nzi; 350 k = lnk[n]; 351 while (nnz-- && k < i) { 352 nzbd++; 353 k = lnk[k]; 354 } 355 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 356 357 /* if free space is not available, make more free space */ 358 if (current_space->local_remaining<nzi) { 359 /* estimated additional space needed */ 360 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi)); 361 PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 362 reallocs++; 363 } 364 365 /* copy data into free space, then initialize lnk */ 366 PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt)); 367 368 bi_ptr[i] = current_space->array; 369 current_space->array += nzi; 370 current_space->local_used += nzi; 371 current_space->local_remaining -= nzi; 372 } 373 374 PetscCall(ISRestoreIndices(isrow,&r)); 375 PetscCall(ISRestoreIndices(isicol,&ic)); 376 377 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 378 PetscCall(PetscMalloc1(bi[n]+1,&bj)); 379 PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag)); 380 PetscCall(PetscLLDestroy(lnk,lnkbt)); 381 PetscCall(PetscFree2(bi_ptr,im)); 382 383 /* put together the new matrix */ 384 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL)); 385 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol)); 386 b = (Mat_SeqAIJ*)(B)->data; 387 388 b->free_a = PETSC_TRUE; 389 b->free_ij = PETSC_TRUE; 390 b->singlemalloc = PETSC_FALSE; 391 392 PetscCall(PetscMalloc1(bdiag[0]+1,&b->a)); 393 394 b->j = bj; 395 b->i = bi; 396 b->diag = bdiag; 397 b->ilen = NULL; 398 b->imax = NULL; 399 b->row = isrow; 400 b->col = iscol; 401 PetscCall(PetscObjectReference((PetscObject)isrow)); 402 PetscCall(PetscObjectReference((PetscObject)iscol)); 403 b->icol = isicol; 404 PetscCall(PetscMalloc1(n+1,&b->solve_work)); 405 406 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 407 PetscCall(PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)))); 408 b->maxnz = b->nz = bdiag[0]+1; 409 410 B->factortype = MAT_FACTOR_LU; 411 B->info.factor_mallocs = reallocs; 412 B->info.fill_ratio_given = f; 413 414 if (ai[n]) { 415 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 416 } else { 417 B->info.fill_ratio_needed = 0.0; 418 } 419 #if defined(PETSC_USE_INFO) 420 if (ai[n] != 0) { 421 PetscReal af = B->info.fill_ratio_needed; 422 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 423 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 424 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af)); 425 PetscCall(PetscInfo(A,"for best performance.\n")); 426 } else { 427 PetscCall(PetscInfo(A,"Empty matrix\n")); 428 } 429 #endif 430 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 431 if (a->inode.size) { 432 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 433 } 434 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 435 PetscFunctionReturn(0); 436 } 437 438 /* 439 Trouble in factorization, should we dump the original matrix? 440 */ 441 PetscErrorCode MatFactorDumpMatrix(Mat A) 442 { 443 PetscBool flg = PETSC_FALSE; 444 445 PetscFunctionBegin; 446 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL)); 447 if (flg) { 448 PetscViewer viewer; 449 char filename[PETSC_MAX_PATH_LEN]; 450 451 PetscCall(PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank)); 452 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer)); 453 PetscCall(MatView(A,viewer)); 454 PetscCall(PetscViewerDestroy(&viewer)); 455 } 456 PetscFunctionReturn(0); 457 } 458 459 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 460 { 461 Mat C =B; 462 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 463 IS isrow = b->row,isicol = b->icol; 464 const PetscInt *r,*ic,*ics; 465 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 466 PetscInt i,j,k,nz,nzL,row,*pj; 467 const PetscInt *ajtmp,*bjtmp; 468 MatScalar *rtmp,*pc,multiplier,*pv; 469 const MatScalar *aa=a->a,*v; 470 PetscBool row_identity,col_identity; 471 FactorShiftCtx sctx; 472 const PetscInt *ddiag; 473 PetscReal rs; 474 MatScalar d; 475 476 PetscFunctionBegin; 477 /* MatPivotSetUp(): initialize shift context sctx */ 478 PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx))); 479 480 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 481 ddiag = a->diag; 482 sctx.shift_top = info->zeropivot; 483 for (i=0; i<n; i++) { 484 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 485 d = (aa)[ddiag[i]]; 486 rs = -PetscAbsScalar(d) - PetscRealPart(d); 487 v = aa+ai[i]; 488 nz = ai[i+1] - ai[i]; 489 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 490 if (rs>sctx.shift_top) sctx.shift_top = rs; 491 } 492 sctx.shift_top *= 1.1; 493 sctx.nshift_max = 5; 494 sctx.shift_lo = 0.; 495 sctx.shift_hi = 1.; 496 } 497 498 PetscCall(ISGetIndices(isrow,&r)); 499 PetscCall(ISGetIndices(isicol,&ic)); 500 PetscCall(PetscMalloc1(n+1,&rtmp)); 501 ics = ic; 502 503 do { 504 sctx.newshift = PETSC_FALSE; 505 for (i=0; i<n; i++) { 506 /* zero rtmp */ 507 /* L part */ 508 nz = bi[i+1] - bi[i]; 509 bjtmp = bj + bi[i]; 510 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 511 512 /* U part */ 513 nz = bdiag[i]-bdiag[i+1]; 514 bjtmp = bj + bdiag[i+1]+1; 515 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 516 517 /* load in initial (unfactored row) */ 518 nz = ai[r[i]+1] - ai[r[i]]; 519 ajtmp = aj + ai[r[i]]; 520 v = aa + ai[r[i]]; 521 for (j=0; j<nz; j++) { 522 rtmp[ics[ajtmp[j]]] = v[j]; 523 } 524 /* ZeropivotApply() */ 525 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 526 527 /* elimination */ 528 bjtmp = bj + bi[i]; 529 row = *bjtmp++; 530 nzL = bi[i+1] - bi[i]; 531 for (k=0; k < nzL; k++) { 532 pc = rtmp + row; 533 if (*pc != 0.0) { 534 pv = b->a + bdiag[row]; 535 multiplier = *pc * (*pv); 536 *pc = multiplier; 537 538 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 539 pv = b->a + bdiag[row+1]+1; 540 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 541 542 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 543 PetscCall(PetscLogFlops(1+2.0*nz)); 544 } 545 row = *bjtmp++; 546 } 547 548 /* finished row so stick it into b->a */ 549 rs = 0.0; 550 /* L part */ 551 pv = b->a + bi[i]; 552 pj = b->j + bi[i]; 553 nz = bi[i+1] - bi[i]; 554 for (j=0; j<nz; j++) { 555 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 556 } 557 558 /* U part */ 559 pv = b->a + bdiag[i+1]+1; 560 pj = b->j + bdiag[i+1]+1; 561 nz = bdiag[i] - bdiag[i+1]-1; 562 for (j=0; j<nz; j++) { 563 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 564 } 565 566 sctx.rs = rs; 567 sctx.pv = rtmp[i]; 568 PetscCall(MatPivotCheck(B,A,info,&sctx,i)); 569 if (sctx.newshift) break; /* break for-loop */ 570 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 571 572 /* Mark diagonal and invert diagonal for simpler triangular solves */ 573 pv = b->a + bdiag[i]; 574 *pv = 1.0/rtmp[i]; 575 576 } /* endof for (i=0; i<n; i++) { */ 577 578 /* MatPivotRefine() */ 579 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 580 /* 581 * if no shift in this attempt & shifting & started shifting & can refine, 582 * then try lower shift 583 */ 584 sctx.shift_hi = sctx.shift_fraction; 585 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 586 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 587 sctx.newshift = PETSC_TRUE; 588 sctx.nshift++; 589 } 590 } while (sctx.newshift); 591 592 PetscCall(PetscFree(rtmp)); 593 PetscCall(ISRestoreIndices(isicol,&ic)); 594 PetscCall(ISRestoreIndices(isrow,&r)); 595 596 PetscCall(ISIdentity(isrow,&row_identity)); 597 PetscCall(ISIdentity(isicol,&col_identity)); 598 if (b->inode.size) { 599 C->ops->solve = MatSolve_SeqAIJ_Inode; 600 } else if (row_identity && col_identity) { 601 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 602 } else { 603 C->ops->solve = MatSolve_SeqAIJ; 604 } 605 C->ops->solveadd = MatSolveAdd_SeqAIJ; 606 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 607 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 608 C->ops->matsolve = MatMatSolve_SeqAIJ; 609 C->assembled = PETSC_TRUE; 610 C->preallocated = PETSC_TRUE; 611 612 PetscCall(PetscLogFlops(C->cmap->n)); 613 614 /* MatShiftView(A,info,&sctx) */ 615 if (sctx.nshift) { 616 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 617 PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top)); 618 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 619 PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 620 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 621 PetscCall(PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount)); 622 } 623 } 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 628 { 629 Mat C =B; 630 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 631 IS isrow = b->row,isicol = b->icol; 632 const PetscInt *r,*ic,*ics; 633 PetscInt nz,row,i,j,n=A->rmap->n,diag; 634 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 635 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 636 MatScalar *pv,*rtmp,*pc,multiplier,d; 637 const MatScalar *v,*aa=a->a; 638 PetscReal rs=0.0; 639 FactorShiftCtx sctx; 640 const PetscInt *ddiag; 641 PetscBool row_identity, col_identity; 642 643 PetscFunctionBegin; 644 /* MatPivotSetUp(): initialize shift context sctx */ 645 PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx))); 646 647 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 648 ddiag = a->diag; 649 sctx.shift_top = info->zeropivot; 650 for (i=0; i<n; i++) { 651 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 652 d = (aa)[ddiag[i]]; 653 rs = -PetscAbsScalar(d) - PetscRealPart(d); 654 v = aa+ai[i]; 655 nz = ai[i+1] - ai[i]; 656 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 657 if (rs>sctx.shift_top) sctx.shift_top = rs; 658 } 659 sctx.shift_top *= 1.1; 660 sctx.nshift_max = 5; 661 sctx.shift_lo = 0.; 662 sctx.shift_hi = 1.; 663 } 664 665 PetscCall(ISGetIndices(isrow,&r)); 666 PetscCall(ISGetIndices(isicol,&ic)); 667 PetscCall(PetscMalloc1(n+1,&rtmp)); 668 ics = ic; 669 670 do { 671 sctx.newshift = PETSC_FALSE; 672 for (i=0; i<n; i++) { 673 nz = bi[i+1] - bi[i]; 674 bjtmp = bj + bi[i]; 675 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 676 677 /* load in initial (unfactored row) */ 678 nz = ai[r[i]+1] - ai[r[i]]; 679 ajtmp = aj + ai[r[i]]; 680 v = aa + ai[r[i]]; 681 for (j=0; j<nz; j++) { 682 rtmp[ics[ajtmp[j]]] = v[j]; 683 } 684 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 685 686 row = *bjtmp++; 687 while (row < i) { 688 pc = rtmp + row; 689 if (*pc != 0.0) { 690 pv = b->a + diag_offset[row]; 691 pj = b->j + diag_offset[row] + 1; 692 multiplier = *pc / *pv++; 693 *pc = multiplier; 694 nz = bi[row+1] - diag_offset[row] - 1; 695 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 696 PetscCall(PetscLogFlops(1+2.0*nz)); 697 } 698 row = *bjtmp++; 699 } 700 /* finished row so stick it into b->a */ 701 pv = b->a + bi[i]; 702 pj = b->j + bi[i]; 703 nz = bi[i+1] - bi[i]; 704 diag = diag_offset[i] - bi[i]; 705 rs = 0.0; 706 for (j=0; j<nz; j++) { 707 pv[j] = rtmp[pj[j]]; 708 rs += PetscAbsScalar(pv[j]); 709 } 710 rs -= PetscAbsScalar(pv[diag]); 711 712 sctx.rs = rs; 713 sctx.pv = pv[diag]; 714 PetscCall(MatPivotCheck(B,A,info,&sctx,i)); 715 if (sctx.newshift) break; 716 pv[diag] = sctx.pv; 717 } 718 719 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 720 /* 721 * if no shift in this attempt & shifting & started shifting & can refine, 722 * then try lower shift 723 */ 724 sctx.shift_hi = sctx.shift_fraction; 725 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 726 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 727 sctx.newshift = PETSC_TRUE; 728 sctx.nshift++; 729 } 730 } while (sctx.newshift); 731 732 /* invert diagonal entries for simpler triangular solves */ 733 for (i=0; i<n; i++) { 734 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 735 } 736 PetscCall(PetscFree(rtmp)); 737 PetscCall(ISRestoreIndices(isicol,&ic)); 738 PetscCall(ISRestoreIndices(isrow,&r)); 739 740 PetscCall(ISIdentity(isrow,&row_identity)); 741 PetscCall(ISIdentity(isicol,&col_identity)); 742 if (row_identity && col_identity) { 743 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 744 } else { 745 C->ops->solve = MatSolve_SeqAIJ_inplace; 746 } 747 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 748 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 749 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 750 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 751 752 C->assembled = PETSC_TRUE; 753 C->preallocated = PETSC_TRUE; 754 755 PetscCall(PetscLogFlops(C->cmap->n)); 756 if (sctx.nshift) { 757 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 758 PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top)); 759 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 760 PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 761 } 762 } 763 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 764 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 765 766 PetscCall(MatSeqAIJCheckInode(C)); 767 PetscFunctionReturn(0); 768 } 769 770 /* 771 This routine implements inplace ILU(0) with row or/and column permutations. 772 Input: 773 A - original matrix 774 Output; 775 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 776 a->j (col index) is permuted by the inverse of colperm, then sorted 777 a->a reordered accordingly with a->j 778 a->diag (ptr to diagonal elements) is updated. 779 */ 780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 781 { 782 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 783 IS isrow = a->row,isicol = a->icol; 784 const PetscInt *r,*ic,*ics; 785 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 786 PetscInt *ajtmp,nz,row; 787 PetscInt *diag = a->diag,nbdiag,*pj; 788 PetscScalar *rtmp,*pc,multiplier,d; 789 MatScalar *pv,*v; 790 PetscReal rs; 791 FactorShiftCtx sctx; 792 const MatScalar *aa=a->a,*vtmp; 793 794 PetscFunctionBegin; 795 PetscCheck(A == B,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 796 797 /* MatPivotSetUp(): initialize shift context sctx */ 798 PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx))); 799 800 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 801 const PetscInt *ddiag = a->diag; 802 sctx.shift_top = info->zeropivot; 803 for (i=0; i<n; i++) { 804 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 805 d = (aa)[ddiag[i]]; 806 rs = -PetscAbsScalar(d) - PetscRealPart(d); 807 vtmp = aa+ai[i]; 808 nz = ai[i+1] - ai[i]; 809 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 810 if (rs>sctx.shift_top) sctx.shift_top = rs; 811 } 812 sctx.shift_top *= 1.1; 813 sctx.nshift_max = 5; 814 sctx.shift_lo = 0.; 815 sctx.shift_hi = 1.; 816 } 817 818 PetscCall(ISGetIndices(isrow,&r)); 819 PetscCall(ISGetIndices(isicol,&ic)); 820 PetscCall(PetscMalloc1(n+1,&rtmp)); 821 PetscCall(PetscArrayzero(rtmp,n+1)); 822 ics = ic; 823 824 #if defined(MV) 825 sctx.shift_top = 0.; 826 sctx.nshift_max = 0; 827 sctx.shift_lo = 0.; 828 sctx.shift_hi = 0.; 829 sctx.shift_fraction = 0.; 830 831 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 832 sctx.shift_top = 0.; 833 for (i=0; i<n; i++) { 834 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 835 d = (a->a)[diag[i]]; 836 rs = -PetscAbsScalar(d) - PetscRealPart(d); 837 v = a->a+ai[i]; 838 nz = ai[i+1] - ai[i]; 839 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 840 if (rs>sctx.shift_top) sctx.shift_top = rs; 841 } 842 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 843 sctx.shift_top *= 1.1; 844 sctx.nshift_max = 5; 845 sctx.shift_lo = 0.; 846 sctx.shift_hi = 1.; 847 } 848 849 sctx.shift_amount = 0.; 850 sctx.nshift = 0; 851 #endif 852 853 do { 854 sctx.newshift = PETSC_FALSE; 855 for (i=0; i<n; i++) { 856 /* load in initial unfactored row */ 857 nz = ai[r[i]+1] - ai[r[i]]; 858 ajtmp = aj + ai[r[i]]; 859 v = a->a + ai[r[i]]; 860 /* sort permuted ajtmp and values v accordingly */ 861 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 862 PetscCall(PetscSortIntWithScalarArray(nz,ajtmp,v)); 863 864 diag[r[i]] = ai[r[i]]; 865 for (j=0; j<nz; j++) { 866 rtmp[ajtmp[j]] = v[j]; 867 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 868 } 869 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 870 871 row = *ajtmp++; 872 while (row < i) { 873 pc = rtmp + row; 874 if (*pc != 0.0) { 875 pv = a->a + diag[r[row]]; 876 pj = aj + diag[r[row]] + 1; 877 878 multiplier = *pc / *pv++; 879 *pc = multiplier; 880 nz = ai[r[row]+1] - diag[r[row]] - 1; 881 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 882 PetscCall(PetscLogFlops(1+2.0*nz)); 883 } 884 row = *ajtmp++; 885 } 886 /* finished row so overwrite it onto a->a */ 887 pv = a->a + ai[r[i]]; 888 pj = aj + ai[r[i]]; 889 nz = ai[r[i]+1] - ai[r[i]]; 890 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 891 892 rs = 0.0; 893 for (j=0; j<nz; j++) { 894 pv[j] = rtmp[pj[j]]; 895 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 896 } 897 898 sctx.rs = rs; 899 sctx.pv = pv[nbdiag]; 900 PetscCall(MatPivotCheck(B,A,info,&sctx,i)); 901 if (sctx.newshift) break; 902 pv[nbdiag] = sctx.pv; 903 } 904 905 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 906 /* 907 * if no shift in this attempt & shifting & started shifting & can refine, 908 * then try lower shift 909 */ 910 sctx.shift_hi = sctx.shift_fraction; 911 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 912 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 913 sctx.newshift = PETSC_TRUE; 914 sctx.nshift++; 915 } 916 } while (sctx.newshift); 917 918 /* invert diagonal entries for simpler triangular solves */ 919 for (i=0; i<n; i++) { 920 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 921 } 922 923 PetscCall(PetscFree(rtmp)); 924 PetscCall(ISRestoreIndices(isicol,&ic)); 925 PetscCall(ISRestoreIndices(isrow,&r)); 926 927 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 928 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 929 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 930 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 931 932 A->assembled = PETSC_TRUE; 933 A->preallocated = PETSC_TRUE; 934 935 PetscCall(PetscLogFlops(A->cmap->n)); 936 if (sctx.nshift) { 937 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 938 PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top)); 939 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 940 PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 941 } 942 } 943 PetscFunctionReturn(0); 944 } 945 946 /* ----------------------------------------------------------- */ 947 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 948 { 949 Mat C; 950 951 PetscFunctionBegin; 952 PetscCall(MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C)); 953 PetscCall(MatLUFactorSymbolic(C,A,row,col,info)); 954 PetscCall(MatLUFactorNumeric(C,A,info)); 955 956 A->ops->solve = C->ops->solve; 957 A->ops->solvetranspose = C->ops->solvetranspose; 958 959 PetscCall(MatHeaderMerge(A,&C)); 960 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol)); 961 PetscFunctionReturn(0); 962 } 963 /* ----------------------------------------------------------- */ 964 965 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 966 { 967 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 968 IS iscol = a->col,isrow = a->row; 969 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 970 PetscInt nz; 971 const PetscInt *rout,*cout,*r,*c; 972 PetscScalar *x,*tmp,*tmps,sum; 973 const PetscScalar *b; 974 const MatScalar *aa = a->a,*v; 975 976 PetscFunctionBegin; 977 if (!n) PetscFunctionReturn(0); 978 979 PetscCall(VecGetArrayRead(bb,&b)); 980 PetscCall(VecGetArrayWrite(xx,&x)); 981 tmp = a->solve_work; 982 983 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 984 PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1); 985 986 /* forward solve the lower triangular */ 987 tmp[0] = b[*r++]; 988 tmps = tmp; 989 for (i=1; i<n; i++) { 990 v = aa + ai[i]; 991 vi = aj + ai[i]; 992 nz = a->diag[i] - ai[i]; 993 sum = b[*r++]; 994 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 995 tmp[i] = sum; 996 } 997 998 /* backward solve the upper triangular */ 999 for (i=n-1; i>=0; i--) { 1000 v = aa + a->diag[i] + 1; 1001 vi = aj + a->diag[i] + 1; 1002 nz = ai[i+1] - a->diag[i] - 1; 1003 sum = tmp[i]; 1004 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1005 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1006 } 1007 1008 PetscCall(ISRestoreIndices(isrow,&rout)); 1009 PetscCall(ISRestoreIndices(iscol,&cout)); 1010 PetscCall(VecRestoreArrayRead(bb,&b)); 1011 PetscCall(VecRestoreArrayWrite(xx,&x)); 1012 PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n)); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1017 { 1018 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1019 IS iscol = a->col,isrow = a->row; 1020 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1021 PetscInt nz,neq,ldb,ldx; 1022 const PetscInt *rout,*cout,*r,*c; 1023 PetscScalar *x,*tmp = a->solve_work,*tmps,sum; 1024 const PetscScalar *b,*aa = a->a,*v; 1025 PetscBool isdense; 1026 1027 PetscFunctionBegin; 1028 if (!n) PetscFunctionReturn(0); 1029 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense)); 1030 PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1031 if (X != B) { 1032 PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense)); 1033 PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1034 } 1035 PetscCall(MatDenseGetArrayRead(B,&b)); 1036 PetscCall(MatDenseGetLDA(B,&ldb)); 1037 PetscCall(MatDenseGetArray(X,&x)); 1038 PetscCall(MatDenseGetLDA(X,&ldx)); 1039 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1040 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1041 for (neq=0; neq<B->cmap->n; neq++) { 1042 /* forward solve the lower triangular */ 1043 tmp[0] = b[r[0]]; 1044 tmps = tmp; 1045 for (i=1; i<n; i++) { 1046 v = aa + ai[i]; 1047 vi = aj + ai[i]; 1048 nz = a->diag[i] - ai[i]; 1049 sum = b[r[i]]; 1050 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1051 tmp[i] = sum; 1052 } 1053 /* backward solve the upper triangular */ 1054 for (i=n-1; i>=0; i--) { 1055 v = aa + a->diag[i] + 1; 1056 vi = aj + a->diag[i] + 1; 1057 nz = ai[i+1] - a->diag[i] - 1; 1058 sum = tmp[i]; 1059 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1060 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1061 } 1062 b += ldb; 1063 x += ldx; 1064 } 1065 PetscCall(ISRestoreIndices(isrow,&rout)); 1066 PetscCall(ISRestoreIndices(iscol,&cout)); 1067 PetscCall(MatDenseRestoreArrayRead(B,&b)); 1068 PetscCall(MatDenseRestoreArray(X,&x)); 1069 PetscCall(PetscLogFlops(B->cmap->n*(2.0*a->nz - n))); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1074 { 1075 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1076 IS iscol = a->col,isrow = a->row; 1077 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1078 PetscInt nz,neq,ldb,ldx; 1079 const PetscInt *rout,*cout,*r,*c; 1080 PetscScalar *x,*tmp = a->solve_work,sum; 1081 const PetscScalar *b,*aa = a->a,*v; 1082 PetscBool isdense; 1083 1084 PetscFunctionBegin; 1085 if (!n) PetscFunctionReturn(0); 1086 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense)); 1087 PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1088 if (X != B) { 1089 PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense)); 1090 PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1091 } 1092 PetscCall(MatDenseGetArrayRead(B,&b)); 1093 PetscCall(MatDenseGetLDA(B,&ldb)); 1094 PetscCall(MatDenseGetArray(X,&x)); 1095 PetscCall(MatDenseGetLDA(X,&ldx)); 1096 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1097 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1098 for (neq=0; neq<B->cmap->n; neq++) { 1099 /* forward solve the lower triangular */ 1100 tmp[0] = b[r[0]]; 1101 v = aa; 1102 vi = aj; 1103 for (i=1; i<n; i++) { 1104 nz = ai[i+1] - ai[i]; 1105 sum = b[r[i]]; 1106 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1107 tmp[i] = sum; 1108 v += nz; vi += nz; 1109 } 1110 /* backward solve the upper triangular */ 1111 for (i=n-1; i>=0; i--) { 1112 v = aa + adiag[i+1]+1; 1113 vi = aj + adiag[i+1]+1; 1114 nz = adiag[i]-adiag[i+1]-1; 1115 sum = tmp[i]; 1116 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1117 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1118 } 1119 b += ldb; 1120 x += ldx; 1121 } 1122 PetscCall(ISRestoreIndices(isrow,&rout)); 1123 PetscCall(ISRestoreIndices(iscol,&cout)); 1124 PetscCall(MatDenseRestoreArrayRead(B,&b)); 1125 PetscCall(MatDenseRestoreArray(X,&x)); 1126 PetscCall(PetscLogFlops(B->cmap->n*(2.0*a->nz - n))); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1131 { 1132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1133 IS iscol = a->col,isrow = a->row; 1134 const PetscInt *r,*c,*rout,*cout; 1135 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1136 PetscInt nz,row; 1137 PetscScalar *x,*tmp,*tmps,sum; 1138 const PetscScalar *b; 1139 const MatScalar *aa = a->a,*v; 1140 1141 PetscFunctionBegin; 1142 if (!n) PetscFunctionReturn(0); 1143 1144 PetscCall(VecGetArrayRead(bb,&b)); 1145 PetscCall(VecGetArrayWrite(xx,&x)); 1146 tmp = a->solve_work; 1147 1148 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1149 PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1); 1150 1151 /* forward solve the lower triangular */ 1152 tmp[0] = b[*r++]; 1153 tmps = tmp; 1154 for (row=1; row<n; row++) { 1155 i = rout[row]; /* permuted row */ 1156 v = aa + ai[i]; 1157 vi = aj + ai[i]; 1158 nz = a->diag[i] - ai[i]; 1159 sum = b[*r++]; 1160 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1161 tmp[row] = sum; 1162 } 1163 1164 /* backward solve the upper triangular */ 1165 for (row=n-1; row>=0; row--) { 1166 i = rout[row]; /* permuted row */ 1167 v = aa + a->diag[i] + 1; 1168 vi = aj + a->diag[i] + 1; 1169 nz = ai[i+1] - a->diag[i] - 1; 1170 sum = tmp[row]; 1171 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1172 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1173 } 1174 1175 PetscCall(ISRestoreIndices(isrow,&rout)); 1176 PetscCall(ISRestoreIndices(iscol,&cout)); 1177 PetscCall(VecRestoreArrayRead(bb,&b)); 1178 PetscCall(VecRestoreArrayWrite(xx,&x)); 1179 PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n)); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 /* ----------------------------------------------------------- */ 1184 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1185 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1186 { 1187 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1188 PetscInt n = A->rmap->n; 1189 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1190 PetscScalar *x; 1191 const PetscScalar *b; 1192 const MatScalar *aa = a->a; 1193 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1194 PetscInt adiag_i,i,nz,ai_i; 1195 const PetscInt *vi; 1196 const MatScalar *v; 1197 PetscScalar sum; 1198 #endif 1199 1200 PetscFunctionBegin; 1201 if (!n) PetscFunctionReturn(0); 1202 1203 PetscCall(VecGetArrayRead(bb,&b)); 1204 PetscCall(VecGetArrayWrite(xx,&x)); 1205 1206 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1207 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1208 #else 1209 /* forward solve the lower triangular */ 1210 x[0] = b[0]; 1211 for (i=1; i<n; i++) { 1212 ai_i = ai[i]; 1213 v = aa + ai_i; 1214 vi = aj + ai_i; 1215 nz = adiag[i] - ai_i; 1216 sum = b[i]; 1217 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1218 x[i] = sum; 1219 } 1220 1221 /* backward solve the upper triangular */ 1222 for (i=n-1; i>=0; i--) { 1223 adiag_i = adiag[i]; 1224 v = aa + adiag_i + 1; 1225 vi = aj + adiag_i + 1; 1226 nz = ai[i+1] - adiag_i - 1; 1227 sum = x[i]; 1228 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1229 x[i] = sum*aa[adiag_i]; 1230 } 1231 #endif 1232 PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n)); 1233 PetscCall(VecRestoreArrayRead(bb,&b)); 1234 PetscCall(VecRestoreArrayWrite(xx,&x)); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1239 { 1240 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1241 IS iscol = a->col,isrow = a->row; 1242 PetscInt i, n = A->rmap->n,j; 1243 PetscInt nz; 1244 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1245 PetscScalar *x,*tmp,sum; 1246 const PetscScalar *b; 1247 const MatScalar *aa = a->a,*v; 1248 1249 PetscFunctionBegin; 1250 if (yy != xx) PetscCall(VecCopy(yy,xx)); 1251 1252 PetscCall(VecGetArrayRead(bb,&b)); 1253 PetscCall(VecGetArray(xx,&x)); 1254 tmp = a->solve_work; 1255 1256 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1257 PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1); 1258 1259 /* forward solve the lower triangular */ 1260 tmp[0] = b[*r++]; 1261 for (i=1; i<n; i++) { 1262 v = aa + ai[i]; 1263 vi = aj + ai[i]; 1264 nz = a->diag[i] - ai[i]; 1265 sum = b[*r++]; 1266 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1267 tmp[i] = sum; 1268 } 1269 1270 /* backward solve the upper triangular */ 1271 for (i=n-1; i>=0; i--) { 1272 v = aa + a->diag[i] + 1; 1273 vi = aj + a->diag[i] + 1; 1274 nz = ai[i+1] - a->diag[i] - 1; 1275 sum = tmp[i]; 1276 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1277 tmp[i] = sum*aa[a->diag[i]]; 1278 x[*c--] += tmp[i]; 1279 } 1280 1281 PetscCall(ISRestoreIndices(isrow,&rout)); 1282 PetscCall(ISRestoreIndices(iscol,&cout)); 1283 PetscCall(VecRestoreArrayRead(bb,&b)); 1284 PetscCall(VecRestoreArray(xx,&x)); 1285 PetscCall(PetscLogFlops(2.0*a->nz)); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1290 { 1291 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1292 IS iscol = a->col,isrow = a->row; 1293 PetscInt i, n = A->rmap->n,j; 1294 PetscInt nz; 1295 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1296 PetscScalar *x,*tmp,sum; 1297 const PetscScalar *b; 1298 const MatScalar *aa = a->a,*v; 1299 1300 PetscFunctionBegin; 1301 if (yy != xx) PetscCall(VecCopy(yy,xx)); 1302 1303 PetscCall(VecGetArrayRead(bb,&b)); 1304 PetscCall(VecGetArray(xx,&x)); 1305 tmp = a->solve_work; 1306 1307 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1308 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1309 1310 /* forward solve the lower triangular */ 1311 tmp[0] = b[r[0]]; 1312 v = aa; 1313 vi = aj; 1314 for (i=1; i<n; i++) { 1315 nz = ai[i+1] - ai[i]; 1316 sum = b[r[i]]; 1317 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1318 tmp[i] = sum; 1319 v += nz; 1320 vi += nz; 1321 } 1322 1323 /* backward solve the upper triangular */ 1324 v = aa + adiag[n-1]; 1325 vi = aj + adiag[n-1]; 1326 for (i=n-1; i>=0; i--) { 1327 nz = adiag[i] - adiag[i+1] - 1; 1328 sum = tmp[i]; 1329 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1330 tmp[i] = sum*v[nz]; 1331 x[c[i]] += tmp[i]; 1332 v += nz+1; vi += nz+1; 1333 } 1334 1335 PetscCall(ISRestoreIndices(isrow,&rout)); 1336 PetscCall(ISRestoreIndices(iscol,&cout)); 1337 PetscCall(VecRestoreArrayRead(bb,&b)); 1338 PetscCall(VecRestoreArray(xx,&x)); 1339 PetscCall(PetscLogFlops(2.0*a->nz)); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1344 { 1345 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1346 IS iscol = a->col,isrow = a->row; 1347 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1348 PetscInt i,n = A->rmap->n,j; 1349 PetscInt nz; 1350 PetscScalar *x,*tmp,s1; 1351 const MatScalar *aa = a->a,*v; 1352 const PetscScalar *b; 1353 1354 PetscFunctionBegin; 1355 PetscCall(VecGetArrayRead(bb,&b)); 1356 PetscCall(VecGetArrayWrite(xx,&x)); 1357 tmp = a->solve_work; 1358 1359 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1360 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1361 1362 /* copy the b into temp work space according to permutation */ 1363 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1364 1365 /* forward solve the U^T */ 1366 for (i=0; i<n; i++) { 1367 v = aa + diag[i]; 1368 vi = aj + diag[i] + 1; 1369 nz = ai[i+1] - diag[i] - 1; 1370 s1 = tmp[i]; 1371 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1372 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1373 tmp[i] = s1; 1374 } 1375 1376 /* backward solve the L^T */ 1377 for (i=n-1; i>=0; i--) { 1378 v = aa + diag[i] - 1; 1379 vi = aj + diag[i] - 1; 1380 nz = diag[i] - ai[i]; 1381 s1 = tmp[i]; 1382 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1383 } 1384 1385 /* copy tmp into x according to permutation */ 1386 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1387 1388 PetscCall(ISRestoreIndices(isrow,&rout)); 1389 PetscCall(ISRestoreIndices(iscol,&cout)); 1390 PetscCall(VecRestoreArrayRead(bb,&b)); 1391 PetscCall(VecRestoreArrayWrite(xx,&x)); 1392 1393 PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n)); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1398 { 1399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1400 IS iscol = a->col,isrow = a->row; 1401 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1402 PetscInt i,n = A->rmap->n,j; 1403 PetscInt nz; 1404 PetscScalar *x,*tmp,s1; 1405 const MatScalar *aa = a->a,*v; 1406 const PetscScalar *b; 1407 1408 PetscFunctionBegin; 1409 PetscCall(VecGetArrayRead(bb,&b)); 1410 PetscCall(VecGetArrayWrite(xx,&x)); 1411 tmp = a->solve_work; 1412 1413 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1414 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1415 1416 /* copy the b into temp work space according to permutation */ 1417 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1418 1419 /* forward solve the U^T */ 1420 for (i=0; i<n; i++) { 1421 v = aa + adiag[i+1] + 1; 1422 vi = aj + adiag[i+1] + 1; 1423 nz = adiag[i] - adiag[i+1] - 1; 1424 s1 = tmp[i]; 1425 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1426 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1427 tmp[i] = s1; 1428 } 1429 1430 /* backward solve the L^T */ 1431 for (i=n-1; i>=0; i--) { 1432 v = aa + ai[i]; 1433 vi = aj + ai[i]; 1434 nz = ai[i+1] - ai[i]; 1435 s1 = tmp[i]; 1436 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1437 } 1438 1439 /* copy tmp into x according to permutation */ 1440 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1441 1442 PetscCall(ISRestoreIndices(isrow,&rout)); 1443 PetscCall(ISRestoreIndices(iscol,&cout)); 1444 PetscCall(VecRestoreArrayRead(bb,&b)); 1445 PetscCall(VecRestoreArrayWrite(xx,&x)); 1446 1447 PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n)); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1452 { 1453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1454 IS iscol = a->col,isrow = a->row; 1455 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1456 PetscInt i,n = A->rmap->n,j; 1457 PetscInt nz; 1458 PetscScalar *x,*tmp,s1; 1459 const MatScalar *aa = a->a,*v; 1460 const PetscScalar *b; 1461 1462 PetscFunctionBegin; 1463 if (zz != xx) PetscCall(VecCopy(zz,xx)); 1464 PetscCall(VecGetArrayRead(bb,&b)); 1465 PetscCall(VecGetArray(xx,&x)); 1466 tmp = a->solve_work; 1467 1468 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1469 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1470 1471 /* copy the b into temp work space according to permutation */ 1472 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1473 1474 /* forward solve the U^T */ 1475 for (i=0; i<n; i++) { 1476 v = aa + diag[i]; 1477 vi = aj + diag[i] + 1; 1478 nz = ai[i+1] - diag[i] - 1; 1479 s1 = tmp[i]; 1480 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1481 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1482 tmp[i] = s1; 1483 } 1484 1485 /* backward solve the L^T */ 1486 for (i=n-1; i>=0; i--) { 1487 v = aa + diag[i] - 1; 1488 vi = aj + diag[i] - 1; 1489 nz = diag[i] - ai[i]; 1490 s1 = tmp[i]; 1491 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1492 } 1493 1494 /* copy tmp into x according to permutation */ 1495 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1496 1497 PetscCall(ISRestoreIndices(isrow,&rout)); 1498 PetscCall(ISRestoreIndices(iscol,&cout)); 1499 PetscCall(VecRestoreArrayRead(bb,&b)); 1500 PetscCall(VecRestoreArray(xx,&x)); 1501 1502 PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n)); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1507 { 1508 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1509 IS iscol = a->col,isrow = a->row; 1510 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1511 PetscInt i,n = A->rmap->n,j; 1512 PetscInt nz; 1513 PetscScalar *x,*tmp,s1; 1514 const MatScalar *aa = a->a,*v; 1515 const PetscScalar *b; 1516 1517 PetscFunctionBegin; 1518 if (zz != xx) PetscCall(VecCopy(zz,xx)); 1519 PetscCall(VecGetArrayRead(bb,&b)); 1520 PetscCall(VecGetArray(xx,&x)); 1521 tmp = a->solve_work; 1522 1523 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 1524 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 1525 1526 /* copy the b into temp work space according to permutation */ 1527 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1528 1529 /* forward solve the U^T */ 1530 for (i=0; i<n; i++) { 1531 v = aa + adiag[i+1] + 1; 1532 vi = aj + adiag[i+1] + 1; 1533 nz = adiag[i] - adiag[i+1] - 1; 1534 s1 = tmp[i]; 1535 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1536 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1537 tmp[i] = s1; 1538 } 1539 1540 /* backward solve the L^T */ 1541 for (i=n-1; i>=0; i--) { 1542 v = aa + ai[i]; 1543 vi = aj + ai[i]; 1544 nz = ai[i+1] - ai[i]; 1545 s1 = tmp[i]; 1546 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1547 } 1548 1549 /* copy tmp into x according to permutation */ 1550 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1551 1552 PetscCall(ISRestoreIndices(isrow,&rout)); 1553 PetscCall(ISRestoreIndices(iscol,&cout)); 1554 PetscCall(VecRestoreArrayRead(bb,&b)); 1555 PetscCall(VecRestoreArray(xx,&x)); 1556 1557 PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n)); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 /* ----------------------------------------------------------------*/ 1562 1563 /* 1564 ilu() under revised new data structure. 1565 Factored arrays bj and ba are stored as 1566 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1567 1568 bi=fact->i is an array of size n+1, in which 1569 bi+ 1570 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1571 bi[n]: points to L(n-1,n-1)+1 1572 1573 bdiag=fact->diag is an array of size n+1,in which 1574 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1575 bdiag[n]: points to entry of U(n-1,0)-1 1576 1577 U(i,:) contains bdiag[i] as its last entry, i.e., 1578 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1579 */ 1580 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1581 { 1582 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1583 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1584 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1585 IS isicol; 1586 1587 PetscFunctionBegin; 1588 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 1589 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE)); 1590 b = (Mat_SeqAIJ*)(fact)->data; 1591 1592 /* allocate matrix arrays for new data structure */ 1593 PetscCall(PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i)); 1594 PetscCall(PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt))); 1595 1596 b->singlemalloc = PETSC_TRUE; 1597 if (!b->diag) { 1598 PetscCall(PetscMalloc1(n+1,&b->diag)); 1599 PetscCall(PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt))); 1600 } 1601 bdiag = b->diag; 1602 1603 if (n > 0) { 1604 PetscCall(PetscArrayzero(b->a,ai[n])); 1605 } 1606 1607 /* set bi and bj with new data structure */ 1608 bi = b->i; 1609 bj = b->j; 1610 1611 /* L part */ 1612 bi[0] = 0; 1613 for (i=0; i<n; i++) { 1614 nz = adiag[i] - ai[i]; 1615 bi[i+1] = bi[i] + nz; 1616 aj = a->j + ai[i]; 1617 for (j=0; j<nz; j++) { 1618 /* *bj = aj[j]; bj++; */ 1619 bj[k++] = aj[j]; 1620 } 1621 } 1622 1623 /* U part */ 1624 bdiag[n] = bi[n]-1; 1625 for (i=n-1; i>=0; i--) { 1626 nz = ai[i+1] - adiag[i] - 1; 1627 aj = a->j + adiag[i] + 1; 1628 for (j=0; j<nz; j++) { 1629 /* *bj = aj[j]; bj++; */ 1630 bj[k++] = aj[j]; 1631 } 1632 /* diag[i] */ 1633 /* *bj = i; bj++; */ 1634 bj[k++] = i; 1635 bdiag[i] = bdiag[i+1] + nz + 1; 1636 } 1637 1638 fact->factortype = MAT_FACTOR_ILU; 1639 fact->info.factor_mallocs = 0; 1640 fact->info.fill_ratio_given = info->fill; 1641 fact->info.fill_ratio_needed = 1.0; 1642 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1643 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1644 1645 b = (Mat_SeqAIJ*)(fact)->data; 1646 b->row = isrow; 1647 b->col = iscol; 1648 b->icol = isicol; 1649 PetscCall(PetscMalloc1(fact->rmap->n+1,&b->solve_work)); 1650 PetscCall(PetscObjectReference((PetscObject)isrow)); 1651 PetscCall(PetscObjectReference((PetscObject)iscol)); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1656 { 1657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1658 IS isicol; 1659 const PetscInt *r,*ic; 1660 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1661 PetscInt *bi,*cols,nnz,*cols_lvl; 1662 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1663 PetscInt i,levels,diagonal_fill; 1664 PetscBool col_identity,row_identity,missing; 1665 PetscReal f; 1666 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1667 PetscBT lnkbt; 1668 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1669 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1670 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1671 1672 PetscFunctionBegin; 1673 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 1674 PetscCall(MatMissingDiagonal(A,&missing,&i)); 1675 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 1676 1677 levels = (PetscInt)info->levels; 1678 PetscCall(ISIdentity(isrow,&row_identity)); 1679 PetscCall(ISIdentity(iscol,&col_identity)); 1680 if (!levels && row_identity && col_identity) { 1681 /* special case: ilu(0) with natural ordering */ 1682 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info)); 1683 if (a->inode.size) { 1684 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 1690 PetscCall(ISGetIndices(isrow,&r)); 1691 PetscCall(ISGetIndices(isicol,&ic)); 1692 1693 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1694 PetscCall(PetscMalloc1(n+1,&bi)); 1695 PetscCall(PetscMalloc1(n+1,&bdiag)); 1696 bi[0] = bdiag[0] = 0; 1697 PetscCall(PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr)); 1698 1699 /* create a linked list for storing column indices of the active row */ 1700 nlnk = n + 1; 1701 PetscCall(PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt)); 1702 1703 /* initial FreeSpace size is f*(ai[n]+1) */ 1704 f = info->fill; 1705 diagonal_fill = (PetscInt)info->diagonal_fill; 1706 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 1707 current_space = free_space; 1708 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl)); 1709 current_space_lvl = free_space_lvl; 1710 for (i=0; i<n; i++) { 1711 nzi = 0; 1712 /* copy current row into linked list */ 1713 nnz = ai[r[i]+1] - ai[r[i]]; 1714 PetscCheck(nnz,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i); 1715 cols = aj + ai[r[i]]; 1716 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1717 PetscCall(PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt)); 1718 nzi += nlnk; 1719 1720 /* make sure diagonal entry is included */ 1721 if (diagonal_fill && lnk[i] == -1) { 1722 fm = n; 1723 while (lnk[fm] < i) fm = lnk[fm]; 1724 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1725 lnk[fm] = i; 1726 lnk_lvl[i] = 0; 1727 nzi++; dcount++; 1728 } 1729 1730 /* add pivot rows into the active row */ 1731 nzbd = 0; 1732 prow = lnk[n]; 1733 while (prow < i) { 1734 nnz = bdiag[prow]; 1735 cols = bj_ptr[prow] + nnz + 1; 1736 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1737 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1738 PetscCall(PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow)); 1739 nzi += nlnk; 1740 prow = lnk[prow]; 1741 nzbd++; 1742 } 1743 bdiag[i] = nzbd; 1744 bi[i+1] = bi[i] + nzi; 1745 /* if free space is not available, make more free space */ 1746 if (current_space->local_remaining<nzi) { 1747 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */ 1748 PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 1749 PetscCall(PetscFreeSpaceGet(nnz,¤t_space_lvl)); 1750 reallocs++; 1751 } 1752 1753 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1754 PetscCall(PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt)); 1755 bj_ptr[i] = current_space->array; 1756 bjlvl_ptr[i] = current_space_lvl->array; 1757 1758 /* make sure the active row i has diagonal entry */ 1759 PetscCheck(*(bj_ptr[i]+bdiag[i]) == i,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1760 1761 current_space->array += nzi; 1762 current_space->local_used += nzi; 1763 current_space->local_remaining -= nzi; 1764 current_space_lvl->array += nzi; 1765 current_space_lvl->local_used += nzi; 1766 current_space_lvl->local_remaining -= nzi; 1767 } 1768 1769 PetscCall(ISRestoreIndices(isrow,&r)); 1770 PetscCall(ISRestoreIndices(isicol,&ic)); 1771 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1772 PetscCall(PetscMalloc1(bi[n]+1,&bj)); 1773 PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag)); 1774 1775 PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt)); 1776 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1777 PetscCall(PetscFree2(bj_ptr,bjlvl_ptr)); 1778 1779 #if defined(PETSC_USE_INFO) 1780 { 1781 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1782 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 1783 PetscCall(PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af)); 1784 PetscCall(PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af)); 1785 PetscCall(PetscInfo(A,"for best performance.\n")); 1786 if (diagonal_fill) { 1787 PetscCall(PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount)); 1788 } 1789 } 1790 #endif 1791 /* put together the new matrix */ 1792 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL)); 1793 PetscCall(PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol)); 1794 b = (Mat_SeqAIJ*)(fact)->data; 1795 1796 b->free_a = PETSC_TRUE; 1797 b->free_ij = PETSC_TRUE; 1798 b->singlemalloc = PETSC_FALSE; 1799 1800 PetscCall(PetscMalloc1(bdiag[0]+1,&b->a)); 1801 1802 b->j = bj; 1803 b->i = bi; 1804 b->diag = bdiag; 1805 b->ilen = NULL; 1806 b->imax = NULL; 1807 b->row = isrow; 1808 b->col = iscol; 1809 PetscCall(PetscObjectReference((PetscObject)isrow)); 1810 PetscCall(PetscObjectReference((PetscObject)iscol)); 1811 b->icol = isicol; 1812 1813 PetscCall(PetscMalloc1(n+1,&b->solve_work)); 1814 /* In b structure: Free imax, ilen, old a, old j. 1815 Allocate bdiag, solve_work, new a, new j */ 1816 PetscCall(PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)))); 1817 b->maxnz = b->nz = bdiag[0]+1; 1818 1819 (fact)->info.factor_mallocs = reallocs; 1820 (fact)->info.fill_ratio_given = f; 1821 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1822 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1823 if (a->inode.size) { 1824 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1825 } 1826 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1827 PetscFunctionReturn(0); 1828 } 1829 1830 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1831 { 1832 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1833 IS isicol; 1834 const PetscInt *r,*ic; 1835 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1836 PetscInt *bi,*cols,nnz,*cols_lvl; 1837 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1838 PetscInt i,levels,diagonal_fill; 1839 PetscBool col_identity,row_identity; 1840 PetscReal f; 1841 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1842 PetscBT lnkbt; 1843 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1844 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1845 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1846 PetscBool missing; 1847 1848 PetscFunctionBegin; 1849 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 1850 PetscCall(MatMissingDiagonal(A,&missing,&i)); 1851 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 1852 1853 f = info->fill; 1854 levels = (PetscInt)info->levels; 1855 diagonal_fill = (PetscInt)info->diagonal_fill; 1856 1857 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 1858 1859 PetscCall(ISIdentity(isrow,&row_identity)); 1860 PetscCall(ISIdentity(iscol,&col_identity)); 1861 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1862 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE)); 1863 1864 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1865 if (a->inode.size) { 1866 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1867 } 1868 fact->factortype = MAT_FACTOR_ILU; 1869 (fact)->info.factor_mallocs = 0; 1870 (fact)->info.fill_ratio_given = info->fill; 1871 (fact)->info.fill_ratio_needed = 1.0; 1872 1873 b = (Mat_SeqAIJ*)(fact)->data; 1874 b->row = isrow; 1875 b->col = iscol; 1876 b->icol = isicol; 1877 PetscCall(PetscMalloc1((fact)->rmap->n+1,&b->solve_work)); 1878 PetscCall(PetscObjectReference((PetscObject)isrow)); 1879 PetscCall(PetscObjectReference((PetscObject)iscol)); 1880 PetscFunctionReturn(0); 1881 } 1882 1883 PetscCall(ISGetIndices(isrow,&r)); 1884 PetscCall(ISGetIndices(isicol,&ic)); 1885 1886 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1887 PetscCall(PetscMalloc1(n+1,&bi)); 1888 PetscCall(PetscMalloc1(n+1,&bdiag)); 1889 bi[0] = bdiag[0] = 0; 1890 1891 PetscCall(PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr)); 1892 1893 /* create a linked list for storing column indices of the active row */ 1894 nlnk = n + 1; 1895 PetscCall(PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt)); 1896 1897 /* initial FreeSpace size is f*(ai[n]+1) */ 1898 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 1899 current_space = free_space; 1900 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl)); 1901 current_space_lvl = free_space_lvl; 1902 1903 for (i=0; i<n; i++) { 1904 nzi = 0; 1905 /* copy current row into linked list */ 1906 nnz = ai[r[i]+1] - ai[r[i]]; 1907 PetscCheck(nnz,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i); 1908 cols = aj + ai[r[i]]; 1909 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1910 PetscCall(PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt)); 1911 nzi += nlnk; 1912 1913 /* make sure diagonal entry is included */ 1914 if (diagonal_fill && lnk[i] == -1) { 1915 fm = n; 1916 while (lnk[fm] < i) fm = lnk[fm]; 1917 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1918 lnk[fm] = i; 1919 lnk_lvl[i] = 0; 1920 nzi++; dcount++; 1921 } 1922 1923 /* add pivot rows into the active row */ 1924 nzbd = 0; 1925 prow = lnk[n]; 1926 while (prow < i) { 1927 nnz = bdiag[prow]; 1928 cols = bj_ptr[prow] + nnz + 1; 1929 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1930 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1931 PetscCall(PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow)); 1932 nzi += nlnk; 1933 prow = lnk[prow]; 1934 nzbd++; 1935 } 1936 bdiag[i] = nzbd; 1937 bi[i+1] = bi[i] + nzi; 1938 1939 /* if free space is not available, make more free space */ 1940 if (current_space->local_remaining<nzi) { 1941 nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */ 1942 PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 1943 PetscCall(PetscFreeSpaceGet(nnz,¤t_space_lvl)); 1944 reallocs++; 1945 } 1946 1947 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1948 PetscCall(PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt)); 1949 bj_ptr[i] = current_space->array; 1950 bjlvl_ptr[i] = current_space_lvl->array; 1951 1952 /* make sure the active row i has diagonal entry */ 1953 PetscCheck(*(bj_ptr[i]+bdiag[i]) == i,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1954 1955 current_space->array += nzi; 1956 current_space->local_used += nzi; 1957 current_space->local_remaining -= nzi; 1958 current_space_lvl->array += nzi; 1959 current_space_lvl->local_used += nzi; 1960 current_space_lvl->local_remaining -= nzi; 1961 } 1962 1963 PetscCall(ISRestoreIndices(isrow,&r)); 1964 PetscCall(ISRestoreIndices(isicol,&ic)); 1965 1966 /* destroy list of free space and other temporary arrays */ 1967 PetscCall(PetscMalloc1(bi[n]+1,&bj)); 1968 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); /* copy free_space -> bj */ 1969 PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt)); 1970 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1971 PetscCall(PetscFree2(bj_ptr,bjlvl_ptr)); 1972 1973 #if defined(PETSC_USE_INFO) 1974 { 1975 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1976 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 1977 PetscCall(PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af)); 1978 PetscCall(PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af)); 1979 PetscCall(PetscInfo(A,"for best performance.\n")); 1980 if (diagonal_fill) { 1981 PetscCall(PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount)); 1982 } 1983 } 1984 #endif 1985 1986 /* put together the new matrix */ 1987 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL)); 1988 PetscCall(PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol)); 1989 b = (Mat_SeqAIJ*)(fact)->data; 1990 1991 b->free_a = PETSC_TRUE; 1992 b->free_ij = PETSC_TRUE; 1993 b->singlemalloc = PETSC_FALSE; 1994 1995 PetscCall(PetscMalloc1(bi[n],&b->a)); 1996 b->j = bj; 1997 b->i = bi; 1998 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1999 b->diag = bdiag; 2000 b->ilen = NULL; 2001 b->imax = NULL; 2002 b->row = isrow; 2003 b->col = iscol; 2004 PetscCall(PetscObjectReference((PetscObject)isrow)); 2005 PetscCall(PetscObjectReference((PetscObject)iscol)); 2006 b->icol = isicol; 2007 PetscCall(PetscMalloc1(n+1,&b->solve_work)); 2008 /* In b structure: Free imax, ilen, old a, old j. 2009 Allocate bdiag, solve_work, new a, new j */ 2010 PetscCall(PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)))); 2011 b->maxnz = b->nz = bi[n]; 2012 2013 (fact)->info.factor_mallocs = reallocs; 2014 (fact)->info.fill_ratio_given = f; 2015 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2016 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2017 if (a->inode.size) { 2018 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2019 } 2020 PetscFunctionReturn(0); 2021 } 2022 2023 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2024 { 2025 Mat C = B; 2026 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2027 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2028 IS ip=b->row,iip = b->icol; 2029 const PetscInt *rip,*riip; 2030 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2031 PetscInt *ai=a->i,*aj=a->j; 2032 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2033 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2034 PetscBool perm_identity; 2035 FactorShiftCtx sctx; 2036 PetscReal rs; 2037 MatScalar d,*v; 2038 2039 PetscFunctionBegin; 2040 /* MatPivotSetUp(): initialize shift context sctx */ 2041 PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx))); 2042 2043 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2044 sctx.shift_top = info->zeropivot; 2045 for (i=0; i<mbs; i++) { 2046 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2047 d = (aa)[a->diag[i]]; 2048 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2049 v = aa+ai[i]; 2050 nz = ai[i+1] - ai[i]; 2051 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2052 if (rs>sctx.shift_top) sctx.shift_top = rs; 2053 } 2054 sctx.shift_top *= 1.1; 2055 sctx.nshift_max = 5; 2056 sctx.shift_lo = 0.; 2057 sctx.shift_hi = 1.; 2058 } 2059 2060 PetscCall(ISGetIndices(ip,&rip)); 2061 PetscCall(ISGetIndices(iip,&riip)); 2062 2063 /* allocate working arrays 2064 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2065 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2066 */ 2067 PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r)); 2068 2069 do { 2070 sctx.newshift = PETSC_FALSE; 2071 2072 for (i=0; i<mbs; i++) c2r[i] = mbs; 2073 if (mbs) il[0] = 0; 2074 2075 for (k = 0; k<mbs; k++) { 2076 /* zero rtmp */ 2077 nz = bi[k+1] - bi[k]; 2078 bjtmp = bj + bi[k]; 2079 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2080 2081 /* load in initial unfactored row */ 2082 bval = ba + bi[k]; 2083 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2084 for (j = jmin; j < jmax; j++) { 2085 col = riip[aj[j]]; 2086 if (col >= k) { /* only take upper triangular entry */ 2087 rtmp[col] = aa[j]; 2088 *bval++ = 0.0; /* for in-place factorization */ 2089 } 2090 } 2091 /* shift the diagonal of the matrix: ZeropivotApply() */ 2092 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2093 2094 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2095 dk = rtmp[k]; 2096 i = c2r[k]; /* first row to be added to k_th row */ 2097 2098 while (i < k) { 2099 nexti = c2r[i]; /* next row to be added to k_th row */ 2100 2101 /* compute multiplier, update diag(k) and U(i,k) */ 2102 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2103 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2104 dk += uikdi*ba[ili]; /* update diag[k] */ 2105 ba[ili] = uikdi; /* -U(i,k) */ 2106 2107 /* add multiple of row i to k-th row */ 2108 jmin = ili + 1; jmax = bi[i+1]; 2109 if (jmin < jmax) { 2110 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2111 /* update il and c2r for row i */ 2112 il[i] = jmin; 2113 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2114 } 2115 i = nexti; 2116 } 2117 2118 /* copy data into U(k,:) */ 2119 rs = 0.0; 2120 jmin = bi[k]; jmax = bi[k+1]-1; 2121 if (jmin < jmax) { 2122 for (j=jmin; j<jmax; j++) { 2123 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2124 } 2125 /* add the k-th row into il and c2r */ 2126 il[k] = jmin; 2127 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2128 } 2129 2130 /* MatPivotCheck() */ 2131 sctx.rs = rs; 2132 sctx.pv = dk; 2133 PetscCall(MatPivotCheck(B,A,info,&sctx,i)); 2134 if (sctx.newshift) break; 2135 dk = sctx.pv; 2136 2137 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2138 } 2139 } while (sctx.newshift); 2140 2141 PetscCall(PetscFree3(rtmp,il,c2r)); 2142 PetscCall(ISRestoreIndices(ip,&rip)); 2143 PetscCall(ISRestoreIndices(iip,&riip)); 2144 2145 PetscCall(ISIdentity(ip,&perm_identity)); 2146 if (perm_identity) { 2147 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2148 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2149 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2150 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2151 } else { 2152 B->ops->solve = MatSolve_SeqSBAIJ_1; 2153 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2154 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2155 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2156 } 2157 2158 C->assembled = PETSC_TRUE; 2159 C->preallocated = PETSC_TRUE; 2160 2161 PetscCall(PetscLogFlops(C->rmap->n)); 2162 2163 /* MatPivotView() */ 2164 if (sctx.nshift) { 2165 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2166 PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top)); 2167 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2168 PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 2169 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2170 PetscCall(PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount)); 2171 } 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2177 { 2178 Mat C = B; 2179 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2180 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2181 IS ip=b->row,iip = b->icol; 2182 const PetscInt *rip,*riip; 2183 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2184 PetscInt *ai=a->i,*aj=a->j; 2185 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2186 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2187 PetscBool perm_identity; 2188 FactorShiftCtx sctx; 2189 PetscReal rs; 2190 MatScalar d,*v; 2191 2192 PetscFunctionBegin; 2193 /* MatPivotSetUp(): initialize shift context sctx */ 2194 PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx))); 2195 2196 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2197 sctx.shift_top = info->zeropivot; 2198 for (i=0; i<mbs; i++) { 2199 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2200 d = (aa)[a->diag[i]]; 2201 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2202 v = aa+ai[i]; 2203 nz = ai[i+1] - ai[i]; 2204 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2205 if (rs>sctx.shift_top) sctx.shift_top = rs; 2206 } 2207 sctx.shift_top *= 1.1; 2208 sctx.nshift_max = 5; 2209 sctx.shift_lo = 0.; 2210 sctx.shift_hi = 1.; 2211 } 2212 2213 PetscCall(ISGetIndices(ip,&rip)); 2214 PetscCall(ISGetIndices(iip,&riip)); 2215 2216 /* initialization */ 2217 PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl)); 2218 2219 do { 2220 sctx.newshift = PETSC_FALSE; 2221 2222 for (i=0; i<mbs; i++) jl[i] = mbs; 2223 il[0] = 0; 2224 2225 for (k = 0; k<mbs; k++) { 2226 /* zero rtmp */ 2227 nz = bi[k+1] - bi[k]; 2228 bjtmp = bj + bi[k]; 2229 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2230 2231 bval = ba + bi[k]; 2232 /* initialize k-th row by the perm[k]-th row of A */ 2233 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2234 for (j = jmin; j < jmax; j++) { 2235 col = riip[aj[j]]; 2236 if (col >= k) { /* only take upper triangular entry */ 2237 rtmp[col] = aa[j]; 2238 *bval++ = 0.0; /* for in-place factorization */ 2239 } 2240 } 2241 /* shift the diagonal of the matrix */ 2242 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2243 2244 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2245 dk = rtmp[k]; 2246 i = jl[k]; /* first row to be added to k_th row */ 2247 2248 while (i < k) { 2249 nexti = jl[i]; /* next row to be added to k_th row */ 2250 2251 /* compute multiplier, update diag(k) and U(i,k) */ 2252 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2253 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2254 dk += uikdi*ba[ili]; 2255 ba[ili] = uikdi; /* -U(i,k) */ 2256 2257 /* add multiple of row i to k-th row */ 2258 jmin = ili + 1; jmax = bi[i+1]; 2259 if (jmin < jmax) { 2260 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2261 /* update il and jl for row i */ 2262 il[i] = jmin; 2263 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2264 } 2265 i = nexti; 2266 } 2267 2268 /* shift the diagonals when zero pivot is detected */ 2269 /* compute rs=sum of abs(off-diagonal) */ 2270 rs = 0.0; 2271 jmin = bi[k]+1; 2272 nz = bi[k+1] - jmin; 2273 bcol = bj + jmin; 2274 for (j=0; j<nz; j++) { 2275 rs += PetscAbsScalar(rtmp[bcol[j]]); 2276 } 2277 2278 sctx.rs = rs; 2279 sctx.pv = dk; 2280 PetscCall(MatPivotCheck(B,A,info,&sctx,k)); 2281 if (sctx.newshift) break; 2282 dk = sctx.pv; 2283 2284 /* copy data into U(k,:) */ 2285 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2286 jmin = bi[k]+1; jmax = bi[k+1]; 2287 if (jmin < jmax) { 2288 for (j=jmin; j<jmax; j++) { 2289 col = bj[j]; ba[j] = rtmp[col]; 2290 } 2291 /* add the k-th row into il and jl */ 2292 il[k] = jmin; 2293 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2294 } 2295 } 2296 } while (sctx.newshift); 2297 2298 PetscCall(PetscFree3(rtmp,il,jl)); 2299 PetscCall(ISRestoreIndices(ip,&rip)); 2300 PetscCall(ISRestoreIndices(iip,&riip)); 2301 2302 PetscCall(ISIdentity(ip,&perm_identity)); 2303 if (perm_identity) { 2304 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2305 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2306 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2307 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2308 } else { 2309 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2310 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2311 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2312 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2313 } 2314 2315 C->assembled = PETSC_TRUE; 2316 C->preallocated = PETSC_TRUE; 2317 2318 PetscCall(PetscLogFlops(C->rmap->n)); 2319 if (sctx.nshift) { 2320 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2321 PetscCall(PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 2322 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2323 PetscCall(PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount)); 2324 } 2325 } 2326 PetscFunctionReturn(0); 2327 } 2328 2329 /* 2330 icc() under revised new data structure. 2331 Factored arrays bj and ba are stored as 2332 U(0,:),...,U(i,:),U(n-1,:) 2333 2334 ui=fact->i is an array of size n+1, in which 2335 ui+ 2336 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2337 ui[n]: points to U(n-1,n-1)+1 2338 2339 udiag=fact->diag is an array of size n,in which 2340 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2341 2342 U(i,:) contains udiag[i] as its last entry, i.e., 2343 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2344 */ 2345 2346 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2347 { 2348 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2349 Mat_SeqSBAIJ *b; 2350 PetscBool perm_identity,missing; 2351 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2352 const PetscInt *rip,*riip; 2353 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2354 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2355 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2356 PetscReal fill =info->fill,levels=info->levels; 2357 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2358 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2359 PetscBT lnkbt; 2360 IS iperm; 2361 2362 PetscFunctionBegin; 2363 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 2364 PetscCall(MatMissingDiagonal(A,&missing,&d)); 2365 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,d); 2366 PetscCall(ISIdentity(perm,&perm_identity)); 2367 PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm)); 2368 2369 PetscCall(PetscMalloc1(am+1,&ui)); 2370 PetscCall(PetscMalloc1(am+1,&udiag)); 2371 ui[0] = 0; 2372 2373 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2374 if (!levels && perm_identity) { 2375 for (i=0; i<am; i++) { 2376 ncols = ai[i+1] - a->diag[i]; 2377 ui[i+1] = ui[i] + ncols; 2378 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2379 } 2380 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 2381 cols = uj; 2382 for (i=0; i<am; i++) { 2383 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2384 ncols = ai[i+1] - a->diag[i] -1; 2385 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2386 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2387 } 2388 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2389 PetscCall(ISGetIndices(iperm,&riip)); 2390 PetscCall(ISGetIndices(perm,&rip)); 2391 2392 /* initialization */ 2393 PetscCall(PetscMalloc1(am+1,&ajtmp)); 2394 2395 /* jl: linked list for storing indices of the pivot rows 2396 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2397 PetscCall(PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il)); 2398 for (i=0; i<am; i++) { 2399 jl[i] = am; il[i] = 0; 2400 } 2401 2402 /* create and initialize a linked list for storing column indices of the active row k */ 2403 nlnk = am + 1; 2404 PetscCall(PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt)); 2405 2406 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2407 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space)); 2408 current_space = free_space; 2409 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl)); 2410 current_space_lvl = free_space_lvl; 2411 2412 for (k=0; k<am; k++) { /* for each active row k */ 2413 /* initialize lnk by the column indices of row rip[k] of A */ 2414 nzk = 0; 2415 ncols = ai[rip[k]+1] - ai[rip[k]]; 2416 PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k); 2417 ncols_upper = 0; 2418 for (j=0; j<ncols; j++) { 2419 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2420 if (riip[i] >= k) { /* only take upper triangular entry */ 2421 ajtmp[ncols_upper] = i; 2422 ncols_upper++; 2423 } 2424 } 2425 PetscCall(PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt)); 2426 nzk += nlnk; 2427 2428 /* update lnk by computing fill-in for each pivot row to be merged in */ 2429 prow = jl[k]; /* 1st pivot row */ 2430 2431 while (prow < k) { 2432 nextprow = jl[prow]; 2433 2434 /* merge prow into k-th row */ 2435 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2436 jmax = ui[prow+1]; 2437 ncols = jmax-jmin; 2438 i = jmin - ui[prow]; 2439 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2440 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2441 j = *(uj - 1); 2442 PetscCall(PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j)); 2443 nzk += nlnk; 2444 2445 /* update il and jl for prow */ 2446 if (jmin < jmax) { 2447 il[prow] = jmin; 2448 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2449 } 2450 prow = nextprow; 2451 } 2452 2453 /* if free space is not available, make more free space */ 2454 if (current_space->local_remaining<nzk) { 2455 i = am - k + 1; /* num of unfactored rows */ 2456 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2457 PetscCall(PetscFreeSpaceGet(i,¤t_space)); 2458 PetscCall(PetscFreeSpaceGet(i,¤t_space_lvl)); 2459 reallocs++; 2460 } 2461 2462 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2463 PetscCheck(nzk != 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %" PetscInt_FMT " in ICC matrix factor",k); 2464 PetscCall(PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt)); 2465 2466 /* add the k-th row into il and jl */ 2467 if (nzk > 1) { 2468 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2469 jl[k] = jl[i]; jl[i] = k; 2470 il[k] = ui[k] + 1; 2471 } 2472 uj_ptr[k] = current_space->array; 2473 uj_lvl_ptr[k] = current_space_lvl->array; 2474 2475 current_space->array += nzk; 2476 current_space->local_used += nzk; 2477 current_space->local_remaining -= nzk; 2478 2479 current_space_lvl->array += nzk; 2480 current_space_lvl->local_used += nzk; 2481 current_space_lvl->local_remaining -= nzk; 2482 2483 ui[k+1] = ui[k] + nzk; 2484 } 2485 2486 PetscCall(ISRestoreIndices(perm,&rip)); 2487 PetscCall(ISRestoreIndices(iperm,&riip)); 2488 PetscCall(PetscFree4(uj_ptr,uj_lvl_ptr,jl,il)); 2489 PetscCall(PetscFree(ajtmp)); 2490 2491 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2492 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 2493 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag)); /* store matrix factor */ 2494 PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt)); 2495 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2496 2497 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2498 2499 /* put together the new matrix in MATSEQSBAIJ format */ 2500 b = (Mat_SeqSBAIJ*)(fact)->data; 2501 b->singlemalloc = PETSC_FALSE; 2502 2503 PetscCall(PetscMalloc1(ui[am]+1,&b->a)); 2504 2505 b->j = uj; 2506 b->i = ui; 2507 b->diag = udiag; 2508 b->free_diag = PETSC_TRUE; 2509 b->ilen = NULL; 2510 b->imax = NULL; 2511 b->row = perm; 2512 b->col = perm; 2513 PetscCall(PetscObjectReference((PetscObject)perm)); 2514 PetscCall(PetscObjectReference((PetscObject)perm)); 2515 b->icol = iperm; 2516 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2517 2518 PetscCall(PetscMalloc1(am+1,&b->solve_work)); 2519 PetscCall(PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)))); 2520 2521 b->maxnz = b->nz = ui[am]; 2522 b->free_a = PETSC_TRUE; 2523 b->free_ij = PETSC_TRUE; 2524 2525 fact->info.factor_mallocs = reallocs; 2526 fact->info.fill_ratio_given = fill; 2527 if (ai[am] != 0) { 2528 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2529 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2530 } else { 2531 fact->info.fill_ratio_needed = 0.0; 2532 } 2533 #if defined(PETSC_USE_INFO) 2534 if (ai[am] != 0) { 2535 PetscReal af = fact->info.fill_ratio_needed; 2536 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af)); 2537 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 2538 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af)); 2539 } else { 2540 PetscCall(PetscInfo(A,"Empty matrix\n")); 2541 } 2542 #endif 2543 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2544 PetscFunctionReturn(0); 2545 } 2546 2547 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2548 { 2549 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2550 Mat_SeqSBAIJ *b; 2551 PetscBool perm_identity,missing; 2552 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2553 const PetscInt *rip,*riip; 2554 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2555 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2556 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2557 PetscReal fill =info->fill,levels=info->levels; 2558 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2559 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2560 PetscBT lnkbt; 2561 IS iperm; 2562 2563 PetscFunctionBegin; 2564 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 2565 PetscCall(MatMissingDiagonal(A,&missing,&d)); 2566 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,d); 2567 PetscCall(ISIdentity(perm,&perm_identity)); 2568 PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm)); 2569 2570 PetscCall(PetscMalloc1(am+1,&ui)); 2571 PetscCall(PetscMalloc1(am+1,&udiag)); 2572 ui[0] = 0; 2573 2574 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2575 if (!levels && perm_identity) { 2576 2577 for (i=0; i<am; i++) { 2578 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2579 udiag[i] = ui[i]; 2580 } 2581 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 2582 cols = uj; 2583 for (i=0; i<am; i++) { 2584 aj = a->j + a->diag[i]; 2585 ncols = ui[i+1] - ui[i]; 2586 for (j=0; j<ncols; j++) *cols++ = *aj++; 2587 } 2588 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2589 PetscCall(ISGetIndices(iperm,&riip)); 2590 PetscCall(ISGetIndices(perm,&rip)); 2591 2592 /* initialization */ 2593 PetscCall(PetscMalloc1(am+1,&ajtmp)); 2594 2595 /* jl: linked list for storing indices of the pivot rows 2596 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2597 PetscCall(PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il)); 2598 for (i=0; i<am; i++) { 2599 jl[i] = am; il[i] = 0; 2600 } 2601 2602 /* create and initialize a linked list for storing column indices of the active row k */ 2603 nlnk = am + 1; 2604 PetscCall(PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt)); 2605 2606 /* initial FreeSpace size is fill*(ai[am]+1) */ 2607 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space)); 2608 current_space = free_space; 2609 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl)); 2610 current_space_lvl = free_space_lvl; 2611 2612 for (k=0; k<am; k++) { /* for each active row k */ 2613 /* initialize lnk by the column indices of row rip[k] of A */ 2614 nzk = 0; 2615 ncols = ai[rip[k]+1] - ai[rip[k]]; 2616 PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k); 2617 ncols_upper = 0; 2618 for (j=0; j<ncols; j++) { 2619 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2620 if (riip[i] >= k) { /* only take upper triangular entry */ 2621 ajtmp[ncols_upper] = i; 2622 ncols_upper++; 2623 } 2624 } 2625 PetscCall(PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt)); 2626 nzk += nlnk; 2627 2628 /* update lnk by computing fill-in for each pivot row to be merged in */ 2629 prow = jl[k]; /* 1st pivot row */ 2630 2631 while (prow < k) { 2632 nextprow = jl[prow]; 2633 2634 /* merge prow into k-th row */ 2635 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2636 jmax = ui[prow+1]; 2637 ncols = jmax-jmin; 2638 i = jmin - ui[prow]; 2639 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2640 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2641 j = *(uj - 1); 2642 PetscCall(PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j)); 2643 nzk += nlnk; 2644 2645 /* update il and jl for prow */ 2646 if (jmin < jmax) { 2647 il[prow] = jmin; 2648 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2649 } 2650 prow = nextprow; 2651 } 2652 2653 /* if free space is not available, make more free space */ 2654 if (current_space->local_remaining<nzk) { 2655 i = am - k + 1; /* num of unfactored rows */ 2656 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2657 PetscCall(PetscFreeSpaceGet(i,¤t_space)); 2658 PetscCall(PetscFreeSpaceGet(i,¤t_space_lvl)); 2659 reallocs++; 2660 } 2661 2662 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2663 PetscCheck(nzk,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %" PetscInt_FMT " in ICC matrix factor",k); 2664 PetscCall(PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt)); 2665 2666 /* add the k-th row into il and jl */ 2667 if (nzk > 1) { 2668 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2669 jl[k] = jl[i]; jl[i] = k; 2670 il[k] = ui[k] + 1; 2671 } 2672 uj_ptr[k] = current_space->array; 2673 uj_lvl_ptr[k] = current_space_lvl->array; 2674 2675 current_space->array += nzk; 2676 current_space->local_used += nzk; 2677 current_space->local_remaining -= nzk; 2678 2679 current_space_lvl->array += nzk; 2680 current_space_lvl->local_used += nzk; 2681 current_space_lvl->local_remaining -= nzk; 2682 2683 ui[k+1] = ui[k] + nzk; 2684 } 2685 2686 #if defined(PETSC_USE_INFO) 2687 if (ai[am] != 0) { 2688 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2689 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af)); 2690 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 2691 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af)); 2692 } else { 2693 PetscCall(PetscInfo(A,"Empty matrix\n")); 2694 } 2695 #endif 2696 2697 PetscCall(ISRestoreIndices(perm,&rip)); 2698 PetscCall(ISRestoreIndices(iperm,&riip)); 2699 PetscCall(PetscFree4(uj_ptr,uj_lvl_ptr,jl,il)); 2700 PetscCall(PetscFree(ajtmp)); 2701 2702 /* destroy list of free space and other temporary array(s) */ 2703 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 2704 PetscCall(PetscFreeSpaceContiguous(&free_space,uj)); 2705 PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt)); 2706 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2707 2708 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2709 2710 /* put together the new matrix in MATSEQSBAIJ format */ 2711 2712 b = (Mat_SeqSBAIJ*)fact->data; 2713 b->singlemalloc = PETSC_FALSE; 2714 2715 PetscCall(PetscMalloc1(ui[am]+1,&b->a)); 2716 2717 b->j = uj; 2718 b->i = ui; 2719 b->diag = udiag; 2720 b->free_diag = PETSC_TRUE; 2721 b->ilen = NULL; 2722 b->imax = NULL; 2723 b->row = perm; 2724 b->col = perm; 2725 2726 PetscCall(PetscObjectReference((PetscObject)perm)); 2727 PetscCall(PetscObjectReference((PetscObject)perm)); 2728 2729 b->icol = iperm; 2730 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2731 PetscCall(PetscMalloc1(am+1,&b->solve_work)); 2732 PetscCall(PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)))); 2733 b->maxnz = b->nz = ui[am]; 2734 b->free_a = PETSC_TRUE; 2735 b->free_ij = PETSC_TRUE; 2736 2737 fact->info.factor_mallocs = reallocs; 2738 fact->info.fill_ratio_given = fill; 2739 if (ai[am] != 0) { 2740 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2741 } else { 2742 fact->info.fill_ratio_needed = 0.0; 2743 } 2744 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2745 PetscFunctionReturn(0); 2746 } 2747 2748 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2749 { 2750 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2751 Mat_SeqSBAIJ *b; 2752 PetscBool perm_identity,missing; 2753 PetscReal fill = info->fill; 2754 const PetscInt *rip,*riip; 2755 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2756 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2757 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2758 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2759 PetscBT lnkbt; 2760 IS iperm; 2761 2762 PetscFunctionBegin; 2763 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 2764 PetscCall(MatMissingDiagonal(A,&missing,&i)); 2765 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 2766 2767 /* check whether perm is the identity mapping */ 2768 PetscCall(ISIdentity(perm,&perm_identity)); 2769 PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm)); 2770 PetscCall(ISGetIndices(iperm,&riip)); 2771 PetscCall(ISGetIndices(perm,&rip)); 2772 2773 /* initialization */ 2774 PetscCall(PetscMalloc1(am+1,&ui)); 2775 PetscCall(PetscMalloc1(am+1,&udiag)); 2776 ui[0] = 0; 2777 2778 /* jl: linked list for storing indices of the pivot rows 2779 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2780 PetscCall(PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols)); 2781 for (i=0; i<am; i++) { 2782 jl[i] = am; il[i] = 0; 2783 } 2784 2785 /* create and initialize a linked list for storing column indices of the active row k */ 2786 nlnk = am + 1; 2787 PetscCall(PetscLLCreate(am,am,nlnk,lnk,lnkbt)); 2788 2789 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2790 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space)); 2791 current_space = free_space; 2792 2793 for (k=0; k<am; k++) { /* for each active row k */ 2794 /* initialize lnk by the column indices of row rip[k] of A */ 2795 nzk = 0; 2796 ncols = ai[rip[k]+1] - ai[rip[k]]; 2797 PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k); 2798 ncols_upper = 0; 2799 for (j=0; j<ncols; j++) { 2800 i = riip[*(aj + ai[rip[k]] + j)]; 2801 if (i >= k) { /* only take upper triangular entry */ 2802 cols[ncols_upper] = i; 2803 ncols_upper++; 2804 } 2805 } 2806 PetscCall(PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt)); 2807 nzk += nlnk; 2808 2809 /* update lnk by computing fill-in for each pivot row to be merged in */ 2810 prow = jl[k]; /* 1st pivot row */ 2811 2812 while (prow < k) { 2813 nextprow = jl[prow]; 2814 /* merge prow into k-th row */ 2815 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2816 jmax = ui[prow+1]; 2817 ncols = jmax-jmin; 2818 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2819 PetscCall(PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt)); 2820 nzk += nlnk; 2821 2822 /* update il and jl for prow */ 2823 if (jmin < jmax) { 2824 il[prow] = jmin; 2825 j = *uj_ptr; 2826 jl[prow] = jl[j]; 2827 jl[j] = prow; 2828 } 2829 prow = nextprow; 2830 } 2831 2832 /* if free space is not available, make more free space */ 2833 if (current_space->local_remaining<nzk) { 2834 i = am - k + 1; /* num of unfactored rows */ 2835 i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2836 PetscCall(PetscFreeSpaceGet(i,¤t_space)); 2837 reallocs++; 2838 } 2839 2840 /* copy data into free space, then initialize lnk */ 2841 PetscCall(PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt)); 2842 2843 /* add the k-th row into il and jl */ 2844 if (nzk > 1) { 2845 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2846 jl[k] = jl[i]; jl[i] = k; 2847 il[k] = ui[k] + 1; 2848 } 2849 ui_ptr[k] = current_space->array; 2850 2851 current_space->array += nzk; 2852 current_space->local_used += nzk; 2853 current_space->local_remaining -= nzk; 2854 2855 ui[k+1] = ui[k] + nzk; 2856 } 2857 2858 PetscCall(ISRestoreIndices(perm,&rip)); 2859 PetscCall(ISRestoreIndices(iperm,&riip)); 2860 PetscCall(PetscFree4(ui_ptr,jl,il,cols)); 2861 2862 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2863 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 2864 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag)); /* store matrix factor */ 2865 PetscCall(PetscLLDestroy(lnk,lnkbt)); 2866 2867 /* put together the new matrix in MATSEQSBAIJ format */ 2868 2869 b = (Mat_SeqSBAIJ*)fact->data; 2870 b->singlemalloc = PETSC_FALSE; 2871 b->free_a = PETSC_TRUE; 2872 b->free_ij = PETSC_TRUE; 2873 2874 PetscCall(PetscMalloc1(ui[am]+1,&b->a)); 2875 2876 b->j = uj; 2877 b->i = ui; 2878 b->diag = udiag; 2879 b->free_diag = PETSC_TRUE; 2880 b->ilen = NULL; 2881 b->imax = NULL; 2882 b->row = perm; 2883 b->col = perm; 2884 2885 PetscCall(PetscObjectReference((PetscObject)perm)); 2886 PetscCall(PetscObjectReference((PetscObject)perm)); 2887 2888 b->icol = iperm; 2889 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2890 2891 PetscCall(PetscMalloc1(am+1,&b->solve_work)); 2892 PetscCall(PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)))); 2893 2894 b->maxnz = b->nz = ui[am]; 2895 2896 fact->info.factor_mallocs = reallocs; 2897 fact->info.fill_ratio_given = fill; 2898 if (ai[am] != 0) { 2899 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2900 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2901 } else { 2902 fact->info.fill_ratio_needed = 0.0; 2903 } 2904 #if defined(PETSC_USE_INFO) 2905 if (ai[am] != 0) { 2906 PetscReal af = fact->info.fill_ratio_needed; 2907 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af)); 2908 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 2909 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af)); 2910 } else { 2911 PetscCall(PetscInfo(A,"Empty matrix\n")); 2912 } 2913 #endif 2914 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2915 PetscFunctionReturn(0); 2916 } 2917 2918 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2919 { 2920 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2921 Mat_SeqSBAIJ *b; 2922 PetscBool perm_identity,missing; 2923 PetscReal fill = info->fill; 2924 const PetscInt *rip,*riip; 2925 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2926 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2927 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2928 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2929 PetscBT lnkbt; 2930 IS iperm; 2931 2932 PetscFunctionBegin; 2933 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n); 2934 PetscCall(MatMissingDiagonal(A,&missing,&i)); 2935 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 2936 2937 /* check whether perm is the identity mapping */ 2938 PetscCall(ISIdentity(perm,&perm_identity)); 2939 PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm)); 2940 PetscCall(ISGetIndices(iperm,&riip)); 2941 PetscCall(ISGetIndices(perm,&rip)); 2942 2943 /* initialization */ 2944 PetscCall(PetscMalloc1(am+1,&ui)); 2945 ui[0] = 0; 2946 2947 /* jl: linked list for storing indices of the pivot rows 2948 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2949 PetscCall(PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols)); 2950 for (i=0; i<am; i++) { 2951 jl[i] = am; il[i] = 0; 2952 } 2953 2954 /* create and initialize a linked list for storing column indices of the active row k */ 2955 nlnk = am + 1; 2956 PetscCall(PetscLLCreate(am,am,nlnk,lnk,lnkbt)); 2957 2958 /* initial FreeSpace size is fill*(ai[am]+1) */ 2959 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space)); 2960 current_space = free_space; 2961 2962 for (k=0; k<am; k++) { /* for each active row k */ 2963 /* initialize lnk by the column indices of row rip[k] of A */ 2964 nzk = 0; 2965 ncols = ai[rip[k]+1] - ai[rip[k]]; 2966 PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k); 2967 ncols_upper = 0; 2968 for (j=0; j<ncols; j++) { 2969 i = riip[*(aj + ai[rip[k]] + j)]; 2970 if (i >= k) { /* only take upper triangular entry */ 2971 cols[ncols_upper] = i; 2972 ncols_upper++; 2973 } 2974 } 2975 PetscCall(PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt)); 2976 nzk += nlnk; 2977 2978 /* update lnk by computing fill-in for each pivot row to be merged in */ 2979 prow = jl[k]; /* 1st pivot row */ 2980 2981 while (prow < k) { 2982 nextprow = jl[prow]; 2983 /* merge prow into k-th row */ 2984 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2985 jmax = ui[prow+1]; 2986 ncols = jmax-jmin; 2987 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2988 PetscCall(PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt)); 2989 nzk += nlnk; 2990 2991 /* update il and jl for prow */ 2992 if (jmin < jmax) { 2993 il[prow] = jmin; 2994 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2995 } 2996 prow = nextprow; 2997 } 2998 2999 /* if free space is not available, make more free space */ 3000 if (current_space->local_remaining<nzk) { 3001 i = am - k + 1; /* num of unfactored rows */ 3002 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3003 PetscCall(PetscFreeSpaceGet(i,¤t_space)); 3004 reallocs++; 3005 } 3006 3007 /* copy data into free space, then initialize lnk */ 3008 PetscCall(PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt)); 3009 3010 /* add the k-th row into il and jl */ 3011 if (nzk-1 > 0) { 3012 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3013 jl[k] = jl[i]; jl[i] = k; 3014 il[k] = ui[k] + 1; 3015 } 3016 ui_ptr[k] = current_space->array; 3017 3018 current_space->array += nzk; 3019 current_space->local_used += nzk; 3020 current_space->local_remaining -= nzk; 3021 3022 ui[k+1] = ui[k] + nzk; 3023 } 3024 3025 #if defined(PETSC_USE_INFO) 3026 if (ai[am] != 0) { 3027 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3028 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af)); 3029 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 3030 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af)); 3031 } else { 3032 PetscCall(PetscInfo(A,"Empty matrix\n")); 3033 } 3034 #endif 3035 3036 PetscCall(ISRestoreIndices(perm,&rip)); 3037 PetscCall(ISRestoreIndices(iperm,&riip)); 3038 PetscCall(PetscFree4(ui_ptr,jl,il,cols)); 3039 3040 /* destroy list of free space and other temporary array(s) */ 3041 PetscCall(PetscMalloc1(ui[am]+1,&uj)); 3042 PetscCall(PetscFreeSpaceContiguous(&free_space,uj)); 3043 PetscCall(PetscLLDestroy(lnk,lnkbt)); 3044 3045 /* put together the new matrix in MATSEQSBAIJ format */ 3046 3047 b = (Mat_SeqSBAIJ*)fact->data; 3048 b->singlemalloc = PETSC_FALSE; 3049 b->free_a = PETSC_TRUE; 3050 b->free_ij = PETSC_TRUE; 3051 3052 PetscCall(PetscMalloc1(ui[am]+1,&b->a)); 3053 3054 b->j = uj; 3055 b->i = ui; 3056 b->diag = NULL; 3057 b->ilen = NULL; 3058 b->imax = NULL; 3059 b->row = perm; 3060 b->col = perm; 3061 3062 PetscCall(PetscObjectReference((PetscObject)perm)); 3063 PetscCall(PetscObjectReference((PetscObject)perm)); 3064 3065 b->icol = iperm; 3066 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3067 3068 PetscCall(PetscMalloc1(am+1,&b->solve_work)); 3069 PetscCall(PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)))); 3070 b->maxnz = b->nz = ui[am]; 3071 3072 fact->info.factor_mallocs = reallocs; 3073 fact->info.fill_ratio_given = fill; 3074 if (ai[am] != 0) { 3075 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3076 } else { 3077 fact->info.fill_ratio_needed = 0.0; 3078 } 3079 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3080 PetscFunctionReturn(0); 3081 } 3082 3083 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3084 { 3085 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3086 PetscInt n = A->rmap->n; 3087 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3088 PetscScalar *x,sum; 3089 const PetscScalar *b; 3090 const MatScalar *aa = a->a,*v; 3091 PetscInt i,nz; 3092 3093 PetscFunctionBegin; 3094 if (!n) PetscFunctionReturn(0); 3095 3096 PetscCall(VecGetArrayRead(bb,&b)); 3097 PetscCall(VecGetArrayWrite(xx,&x)); 3098 3099 /* forward solve the lower triangular */ 3100 x[0] = b[0]; 3101 v = aa; 3102 vi = aj; 3103 for (i=1; i<n; i++) { 3104 nz = ai[i+1] - ai[i]; 3105 sum = b[i]; 3106 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3107 v += nz; 3108 vi += nz; 3109 x[i] = sum; 3110 } 3111 3112 /* backward solve the upper triangular */ 3113 for (i=n-1; i>=0; i--) { 3114 v = aa + adiag[i+1] + 1; 3115 vi = aj + adiag[i+1] + 1; 3116 nz = adiag[i] - adiag[i+1]-1; 3117 sum = x[i]; 3118 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3119 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3120 } 3121 3122 PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n)); 3123 PetscCall(VecRestoreArrayRead(bb,&b)); 3124 PetscCall(VecRestoreArrayWrite(xx,&x)); 3125 PetscFunctionReturn(0); 3126 } 3127 3128 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3129 { 3130 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3131 IS iscol = a->col,isrow = a->row; 3132 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3133 const PetscInt *rout,*cout,*r,*c; 3134 PetscScalar *x,*tmp,sum; 3135 const PetscScalar *b; 3136 const MatScalar *aa = a->a,*v; 3137 3138 PetscFunctionBegin; 3139 if (!n) PetscFunctionReturn(0); 3140 3141 PetscCall(VecGetArrayRead(bb,&b)); 3142 PetscCall(VecGetArrayWrite(xx,&x)); 3143 tmp = a->solve_work; 3144 3145 PetscCall(ISGetIndices(isrow,&rout)); r = rout; 3146 PetscCall(ISGetIndices(iscol,&cout)); c = cout; 3147 3148 /* forward solve the lower triangular */ 3149 tmp[0] = b[r[0]]; 3150 v = aa; 3151 vi = aj; 3152 for (i=1; i<n; i++) { 3153 nz = ai[i+1] - ai[i]; 3154 sum = b[r[i]]; 3155 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3156 tmp[i] = sum; 3157 v += nz; vi += nz; 3158 } 3159 3160 /* backward solve the upper triangular */ 3161 for (i=n-1; i>=0; i--) { 3162 v = aa + adiag[i+1]+1; 3163 vi = aj + adiag[i+1]+1; 3164 nz = adiag[i]-adiag[i+1]-1; 3165 sum = tmp[i]; 3166 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3167 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3168 } 3169 3170 PetscCall(ISRestoreIndices(isrow,&rout)); 3171 PetscCall(ISRestoreIndices(iscol,&cout)); 3172 PetscCall(VecRestoreArrayRead(bb,&b)); 3173 PetscCall(VecRestoreArrayWrite(xx,&x)); 3174 PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n)); 3175 PetscFunctionReturn(0); 3176 } 3177 3178 /* 3179 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3180 */ 3181 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3182 { 3183 Mat B = *fact; 3184 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3185 IS isicol; 3186 const PetscInt *r,*ic; 3187 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3188 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3189 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3190 PetscInt nlnk,*lnk; 3191 PetscBT lnkbt; 3192 PetscBool row_identity,icol_identity; 3193 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3194 const PetscInt *ics; 3195 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3196 PetscReal dt =info->dt,shift=info->shiftamount; 3197 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3198 PetscBool missing; 3199 3200 PetscFunctionBegin; 3201 if (dt == PETSC_DEFAULT) dt = 0.005; 3202 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3203 3204 /* ------- symbolic factorization, can be reused ---------*/ 3205 PetscCall(MatMissingDiagonal(A,&missing,&i)); 3206 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 3207 adiag=a->diag; 3208 3209 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 3210 3211 /* bdiag is location of diagonal in factor */ 3212 PetscCall(PetscMalloc1(n+1,&bdiag)); /* becomes b->diag */ 3213 PetscCall(PetscMalloc1(n+1,&bdiag_rev)); /* temporary */ 3214 3215 /* allocate row pointers bi */ 3216 PetscCall(PetscMalloc1(2*n+2,&bi)); 3217 3218 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3219 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3220 nnz_max = ai[n]+2*n*dtcount+2; 3221 3222 PetscCall(PetscMalloc1(nnz_max+1,&bj)); 3223 PetscCall(PetscMalloc1(nnz_max+1,&ba)); 3224 3225 /* put together the new matrix */ 3226 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL)); 3227 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol)); 3228 b = (Mat_SeqAIJ*)B->data; 3229 3230 b->free_a = PETSC_TRUE; 3231 b->free_ij = PETSC_TRUE; 3232 b->singlemalloc = PETSC_FALSE; 3233 3234 b->a = ba; 3235 b->j = bj; 3236 b->i = bi; 3237 b->diag = bdiag; 3238 b->ilen = NULL; 3239 b->imax = NULL; 3240 b->row = isrow; 3241 b->col = iscol; 3242 PetscCall(PetscObjectReference((PetscObject)isrow)); 3243 PetscCall(PetscObjectReference((PetscObject)iscol)); 3244 b->icol = isicol; 3245 3246 PetscCall(PetscMalloc1(n+1,&b->solve_work)); 3247 PetscCall(PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)))); 3248 b->maxnz = nnz_max; 3249 3250 B->factortype = MAT_FACTOR_ILUDT; 3251 B->info.factor_mallocs = 0; 3252 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3253 /* ------- end of symbolic factorization ---------*/ 3254 3255 PetscCall(ISGetIndices(isrow,&r)); 3256 PetscCall(ISGetIndices(isicol,&ic)); 3257 ics = ic; 3258 3259 /* linked list for storing column indices of the active row */ 3260 nlnk = n + 1; 3261 PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt)); 3262 3263 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3264 PetscCall(PetscMalloc2(n,&im,n,&jtmp)); 3265 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3266 PetscCall(PetscMalloc2(n,&rtmp,n,&vtmp)); 3267 PetscCall(PetscArrayzero(rtmp,n)); 3268 3269 bi[0] = 0; 3270 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3271 bdiag_rev[n] = bdiag[0]; 3272 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3273 for (i=0; i<n; i++) { 3274 /* copy initial fill into linked list */ 3275 nzi = ai[r[i]+1] - ai[r[i]]; 3276 PetscCheck(nzi,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i); 3277 nzi_al = adiag[r[i]] - ai[r[i]]; 3278 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3279 ajtmp = aj + ai[r[i]]; 3280 PetscCall(PetscLLAddPerm(nzi,ajtmp,ic,n,&nlnk,lnk,lnkbt)); 3281 3282 /* load in initial (unfactored row) */ 3283 aatmp = a->a + ai[r[i]]; 3284 for (j=0; j<nzi; j++) { 3285 rtmp[ics[*ajtmp++]] = *aatmp++; 3286 } 3287 3288 /* add pivot rows into linked list */ 3289 row = lnk[n]; 3290 while (row < i) { 3291 nzi_bl = bi[row+1] - bi[row] + 1; 3292 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3293 PetscCall(PetscLLAddSortedLU(bjtmp,row,&nlnk,lnk,lnkbt,i,nzi_bl,im)); 3294 nzi += nlnk; 3295 row = lnk[row]; 3296 } 3297 3298 /* copy data from lnk into jtmp, then initialize lnk */ 3299 PetscCall(PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt)); 3300 3301 /* numerical factorization */ 3302 bjtmp = jtmp; 3303 row = *bjtmp++; /* 1st pivot row */ 3304 while (row < i) { 3305 pc = rtmp + row; 3306 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3307 multiplier = (*pc) * (*pv); 3308 *pc = multiplier; 3309 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3310 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3311 pv = ba + bdiag[row+1] + 1; 3312 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3313 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3314 PetscCall(PetscLogFlops(1+2.0*nz)); 3315 } 3316 row = *bjtmp++; 3317 } 3318 3319 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3320 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3321 nzi_bl = 0; j = 0; 3322 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3323 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3324 nzi_bl++; j++; 3325 } 3326 nzi_bu = nzi - nzi_bl -1; 3327 while (j < nzi) { 3328 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3329 j++; 3330 } 3331 3332 bjtmp = bj + bi[i]; 3333 batmp = ba + bi[i]; 3334 /* apply level dropping rule to L part */ 3335 ncut = nzi_al + dtcount; 3336 if (ncut < nzi_bl) { 3337 PetscCall(PetscSortSplit(ncut,nzi_bl,vtmp,jtmp)); 3338 PetscCall(PetscSortIntWithScalarArray(ncut,jtmp,vtmp)); 3339 } else { 3340 ncut = nzi_bl; 3341 } 3342 for (j=0; j<ncut; j++) { 3343 bjtmp[j] = jtmp[j]; 3344 batmp[j] = vtmp[j]; 3345 } 3346 bi[i+1] = bi[i] + ncut; 3347 nzi = ncut + 1; 3348 3349 /* apply level dropping rule to U part */ 3350 ncut = nzi_au + dtcount; 3351 if (ncut < nzi_bu) { 3352 PetscCall(PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1)); 3353 PetscCall(PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1)); 3354 } else { 3355 ncut = nzi_bu; 3356 } 3357 nzi += ncut; 3358 3359 /* mark bdiagonal */ 3360 bdiag[i+1] = bdiag[i] - (ncut + 1); 3361 bdiag_rev[n-i-1] = bdiag[i+1]; 3362 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3363 bjtmp = bj + bdiag[i]; 3364 batmp = ba + bdiag[i]; 3365 *bjtmp = i; 3366 *batmp = diag_tmp; /* rtmp[i]; */ 3367 if (*batmp == 0.0) { 3368 *batmp = dt+shift; 3369 } 3370 *batmp = 1.0/(*batmp); /* invert diagonal entries for simpler triangular solves */ 3371 3372 bjtmp = bj + bdiag[i+1]+1; 3373 batmp = ba + bdiag[i+1]+1; 3374 for (k=0; k<ncut; k++) { 3375 bjtmp[k] = jtmp[nzi_bl+1+k]; 3376 batmp[k] = vtmp[nzi_bl+1+k]; 3377 } 3378 3379 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3380 } /* for (i=0; i<n; i++) */ 3381 PetscCheck(bi[n] < bdiag[n],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT,bi[n],bdiag[n]); 3382 3383 PetscCall(ISRestoreIndices(isrow,&r)); 3384 PetscCall(ISRestoreIndices(isicol,&ic)); 3385 3386 PetscCall(PetscLLDestroy(lnk,lnkbt)); 3387 PetscCall(PetscFree2(im,jtmp)); 3388 PetscCall(PetscFree2(rtmp,vtmp)); 3389 PetscCall(PetscFree(bdiag_rev)); 3390 3391 PetscCall(PetscLogFlops(B->cmap->n)); 3392 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3393 3394 PetscCall(ISIdentity(isrow,&row_identity)); 3395 PetscCall(ISIdentity(isicol,&icol_identity)); 3396 if (row_identity && icol_identity) { 3397 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3398 } else { 3399 B->ops->solve = MatSolve_SeqAIJ; 3400 } 3401 3402 B->ops->solveadd = NULL; 3403 B->ops->solvetranspose = NULL; 3404 B->ops->solvetransposeadd = NULL; 3405 B->ops->matsolve = NULL; 3406 B->assembled = PETSC_TRUE; 3407 B->preallocated = PETSC_TRUE; 3408 PetscFunctionReturn(0); 3409 } 3410 3411 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3412 /* 3413 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3414 */ 3415 3416 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3417 { 3418 PetscFunctionBegin; 3419 PetscCall(MatILUDTFactor_SeqAIJ(A,row,col,info,&fact)); 3420 PetscFunctionReturn(0); 3421 } 3422 3423 /* 3424 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3425 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3426 */ 3427 /* 3428 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3429 */ 3430 3431 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3432 { 3433 Mat C =fact; 3434 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3435 IS isrow = b->row,isicol = b->icol; 3436 const PetscInt *r,*ic,*ics; 3437 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3438 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3439 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3440 PetscReal dt=info->dt,shift=info->shiftamount; 3441 PetscBool row_identity, col_identity; 3442 3443 PetscFunctionBegin; 3444 PetscCall(ISGetIndices(isrow,&r)); 3445 PetscCall(ISGetIndices(isicol,&ic)); 3446 PetscCall(PetscMalloc1(n+1,&rtmp)); 3447 ics = ic; 3448 3449 for (i=0; i<n; i++) { 3450 /* initialize rtmp array */ 3451 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3452 bjtmp = bj + bi[i]; 3453 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3454 rtmp[i] = 0.0; 3455 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3456 bjtmp = bj + bdiag[i+1] + 1; 3457 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3458 3459 /* load in initial unfactored row of A */ 3460 nz = ai[r[i]+1] - ai[r[i]]; 3461 ajtmp = aj + ai[r[i]]; 3462 v = aa + ai[r[i]]; 3463 for (j=0; j<nz; j++) { 3464 rtmp[ics[*ajtmp++]] = v[j]; 3465 } 3466 3467 /* numerical factorization */ 3468 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3469 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3470 k = 0; 3471 while (k < nzl) { 3472 row = *bjtmp++; 3473 pc = rtmp + row; 3474 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3475 multiplier = (*pc) * (*pv); 3476 *pc = multiplier; 3477 if (PetscAbsScalar(multiplier) > dt) { 3478 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3479 pv = b->a + bdiag[row+1] + 1; 3480 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3481 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3482 PetscCall(PetscLogFlops(1+2.0*nz)); 3483 } 3484 k++; 3485 } 3486 3487 /* finished row so stick it into b->a */ 3488 /* L-part */ 3489 pv = b->a + bi[i]; 3490 pj = bj + bi[i]; 3491 nzl = bi[i+1] - bi[i]; 3492 for (j=0; j<nzl; j++) { 3493 pv[j] = rtmp[pj[j]]; 3494 } 3495 3496 /* diagonal: invert diagonal entries for simpler triangular solves */ 3497 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3498 b->a[bdiag[i]] = 1.0/rtmp[i]; 3499 3500 /* U-part */ 3501 pv = b->a + bdiag[i+1] + 1; 3502 pj = bj + bdiag[i+1] + 1; 3503 nzu = bdiag[i] - bdiag[i+1] - 1; 3504 for (j=0; j<nzu; j++) { 3505 pv[j] = rtmp[pj[j]]; 3506 } 3507 } 3508 3509 PetscCall(PetscFree(rtmp)); 3510 PetscCall(ISRestoreIndices(isicol,&ic)); 3511 PetscCall(ISRestoreIndices(isrow,&r)); 3512 3513 PetscCall(ISIdentity(isrow,&row_identity)); 3514 PetscCall(ISIdentity(isicol,&col_identity)); 3515 if (row_identity && col_identity) { 3516 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3517 } else { 3518 C->ops->solve = MatSolve_SeqAIJ; 3519 } 3520 C->ops->solveadd = NULL; 3521 C->ops->solvetranspose = NULL; 3522 C->ops->solvetransposeadd = NULL; 3523 C->ops->matsolve = NULL; 3524 C->assembled = PETSC_TRUE; 3525 C->preallocated = PETSC_TRUE; 3526 3527 PetscCall(PetscLogFlops(C->cmap->n)); 3528 PetscFunctionReturn(0); 3529 } 3530