1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 /* 8 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9 10 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 11 */ 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 15 PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 16 const PetscInt *ai = a->i, *aj = a->j; 17 const PetscScalar *aa = a->a; 18 PetscBool *done; 19 PetscReal best, past = 0, future; 20 21 PetscFunctionBegin; 22 /* pick initial row */ 23 best = -1; 24 for (i = 0; i < n; i++) { 25 future = 0.0; 26 for (j = ai[i]; j < ai[i + 1]; j++) { 27 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28 else past = PetscAbsScalar(aa[j]); 29 } 30 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 31 if (past / future > best) { 32 best = past / future; 33 current = i; 34 } 35 } 36 37 PetscCall(PetscMalloc1(n, &done)); 38 PetscCall(PetscArrayzero(done, n)); 39 PetscCall(PetscMalloc1(n, &order)); 40 order[0] = current; 41 for (i = 0; i < n - 1; i++) { 42 done[current] = PETSC_TRUE; 43 best = -1; 44 /* loop over all neighbors of current pivot */ 45 for (j = ai[current]; j < ai[current + 1]; j++) { 46 jj = aj[j]; 47 if (done[jj]) continue; 48 /* loop over columns of potential next row computing weights for below and above diagonal */ 49 past = future = 0.0; 50 for (k = ai[jj]; k < ai[jj + 1]; k++) { 51 kk = aj[k]; 52 if (done[kk]) past += PetscAbsScalar(aa[k]); 53 else if (kk != jj) future += PetscAbsScalar(aa[k]); 54 } 55 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 56 if (past / future > best) { 57 best = past / future; 58 newcurrent = jj; 59 } 60 } 61 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 62 best = -1; 63 for (k = 0; k < n; k++) { 64 if (done[k]) continue; 65 future = 0.0; 66 past = 0.0; 67 for (j = ai[k]; j < ai[k + 1]; j++) { 68 kk = aj[j]; 69 if (done[kk]) past += PetscAbsScalar(aa[j]); 70 else if (kk != k) future += PetscAbsScalar(aa[j]); 71 } 72 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 73 if (past / future > best) { 74 best = past / future; 75 newcurrent = k; 76 } 77 } 78 } 79 PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 80 current = newcurrent; 81 order[i + 1] = current; 82 } 83 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 84 *icol = *irow; 85 PetscCall(PetscObjectReference((PetscObject)*irow)); 86 PetscCall(PetscFree(done)); 87 PetscCall(PetscFree(order)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 91 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 92 { 93 PetscFunctionBegin; 94 *type = MATSOLVERPETSC; 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 99 { 100 PetscInt n = A->rmap->n; 101 102 PetscFunctionBegin; 103 #if defined(PETSC_USE_COMPLEX) 104 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported"); 105 #endif 106 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 107 PetscCall(MatSetSizes(*B, n, n, n, n)); 108 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 109 PetscCall(MatSetType(*B, MATSEQAIJ)); 110 111 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 112 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 113 114 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 115 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 116 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 117 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 118 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 119 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 120 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 121 122 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 123 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 124 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 125 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 126 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 127 (*B)->factortype = ftype; 128 129 PetscCall(PetscFree((*B)->solvertype)); 130 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 131 (*B)->canuseordering = PETSC_TRUE; 132 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 133 PetscFunctionReturn(PETSC_SUCCESS); 134 } 135 136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 137 { 138 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 139 IS isicol; 140 const PetscInt *r, *ic; 141 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 142 PetscInt *bi, *bj, *ajtmp; 143 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 144 PetscReal f; 145 PetscInt nlnk, *lnk, k, **bi_ptr; 146 PetscFreeSpaceList free_space = NULL, current_space = NULL; 147 PetscBT lnkbt; 148 PetscBool missing; 149 150 PetscFunctionBegin; 151 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 152 PetscCall(MatMissingDiagonal(A, &missing, &i)); 153 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 154 155 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 156 PetscCall(ISGetIndices(isrow, &r)); 157 PetscCall(ISGetIndices(isicol, &ic)); 158 159 /* get new row pointers */ 160 PetscCall(PetscMalloc1(n + 1, &bi)); 161 bi[0] = 0; 162 163 /* bdiag is location of diagonal in factor */ 164 PetscCall(PetscMalloc1(n + 1, &bdiag)); 165 bdiag[0] = 0; 166 167 /* linked list for storing column indices of the active row */ 168 nlnk = n + 1; 169 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 170 171 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 172 173 /* initial FreeSpace size is f*(ai[n]+1) */ 174 f = info->fill; 175 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 176 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 177 current_space = free_space; 178 179 for (i = 0; i < n; i++) { 180 /* copy previous fill into linked list */ 181 nzi = 0; 182 nnz = ai[r[i] + 1] - ai[r[i]]; 183 ajtmp = aj + ai[r[i]]; 184 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 185 nzi += nlnk; 186 187 /* add pivot rows into linked list */ 188 row = lnk[n]; 189 while (row < i) { 190 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 191 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 192 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 193 nzi += nlnk; 194 row = lnk[row]; 195 } 196 bi[i + 1] = bi[i] + nzi; 197 im[i] = nzi; 198 199 /* mark bdiag */ 200 nzbd = 0; 201 nnz = nzi; 202 k = lnk[n]; 203 while (nnz-- && k < i) { 204 nzbd++; 205 k = lnk[k]; 206 } 207 bdiag[i] = bi[i] + nzbd; 208 209 /* if free space is not available, make more free space */ 210 if (current_space->local_remaining < nzi) { 211 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 212 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 213 reallocs++; 214 } 215 216 /* copy data into free space, then initialize lnk */ 217 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 218 219 bi_ptr[i] = current_space->array; 220 current_space->array += nzi; 221 current_space->local_used += nzi; 222 current_space->local_remaining -= nzi; 223 } 224 #if defined(PETSC_USE_INFO) 225 if (ai[n] != 0) { 226 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 227 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 228 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 229 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 230 PetscCall(PetscInfo(A, "for best performance.\n")); 231 } else { 232 PetscCall(PetscInfo(A, "Empty matrix\n")); 233 } 234 #endif 235 236 PetscCall(ISRestoreIndices(isrow, &r)); 237 PetscCall(ISRestoreIndices(isicol, &ic)); 238 239 /* destroy list of free space and other temporary array(s) */ 240 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 241 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 242 PetscCall(PetscLLDestroy(lnk, lnkbt)); 243 PetscCall(PetscFree2(bi_ptr, im)); 244 245 /* put together the new matrix */ 246 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 247 b = (Mat_SeqAIJ *)(B)->data; 248 249 b->free_a = PETSC_TRUE; 250 b->free_ij = PETSC_TRUE; 251 b->singlemalloc = PETSC_FALSE; 252 253 PetscCall(PetscMalloc1(bi[n] + 1, &b->a)); 254 b->j = bj; 255 b->i = bi; 256 b->diag = bdiag; 257 b->ilen = NULL; 258 b->imax = NULL; 259 b->row = isrow; 260 b->col = iscol; 261 PetscCall(PetscObjectReference((PetscObject)isrow)); 262 PetscCall(PetscObjectReference((PetscObject)iscol)); 263 b->icol = isicol; 264 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 265 266 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 267 b->maxnz = b->nz = bi[n]; 268 269 (B)->factortype = MAT_FACTOR_LU; 270 (B)->info.factor_mallocs = reallocs; 271 (B)->info.fill_ratio_given = f; 272 273 if (ai[n]) { 274 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 275 } else { 276 (B)->info.fill_ratio_needed = 0.0; 277 } 278 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 279 if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 284 { 285 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 286 IS isicol; 287 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 288 PetscInt i, n = A->rmap->n; 289 PetscInt *bi, *bj; 290 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 291 PetscReal f; 292 PetscInt nlnk, *lnk, k, **bi_ptr; 293 PetscFreeSpaceList free_space = NULL, current_space = NULL; 294 PetscBT lnkbt; 295 PetscBool missing; 296 297 PetscFunctionBegin; 298 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 299 PetscCall(MatMissingDiagonal(A, &missing, &i)); 300 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 301 302 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 303 PetscCall(ISGetIndices(isrow, &r)); 304 PetscCall(ISGetIndices(isicol, &ic)); 305 306 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 307 PetscCall(PetscMalloc1(n + 1, &bi)); 308 PetscCall(PetscMalloc1(n + 1, &bdiag)); 309 bi[0] = bdiag[0] = 0; 310 311 /* linked list for storing column indices of the active row */ 312 nlnk = n + 1; 313 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 314 315 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 316 317 /* initial FreeSpace size is f*(ai[n]+1) */ 318 f = info->fill; 319 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 320 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 321 current_space = free_space; 322 323 for (i = 0; i < n; i++) { 324 /* copy previous fill into linked list */ 325 nzi = 0; 326 nnz = ai[r[i] + 1] - ai[r[i]]; 327 ajtmp = aj + ai[r[i]]; 328 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 329 nzi += nlnk; 330 331 /* add pivot rows into linked list */ 332 row = lnk[n]; 333 while (row < i) { 334 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 335 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 336 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 337 nzi += nlnk; 338 row = lnk[row]; 339 } 340 bi[i + 1] = bi[i] + nzi; 341 im[i] = nzi; 342 343 /* mark bdiag */ 344 nzbd = 0; 345 nnz = nzi; 346 k = lnk[n]; 347 while (nnz-- && k < i) { 348 nzbd++; 349 k = lnk[k]; 350 } 351 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 352 353 /* if free space is not available, make more free space */ 354 if (current_space->local_remaining < nzi) { 355 /* estimated additional space needed */ 356 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 357 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 358 reallocs++; 359 } 360 361 /* copy data into free space, then initialize lnk */ 362 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 363 364 bi_ptr[i] = current_space->array; 365 current_space->array += nzi; 366 current_space->local_used += nzi; 367 current_space->local_remaining -= nzi; 368 } 369 370 PetscCall(ISRestoreIndices(isrow, &r)); 371 PetscCall(ISRestoreIndices(isicol, &ic)); 372 373 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 374 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 375 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 376 PetscCall(PetscLLDestroy(lnk, lnkbt)); 377 PetscCall(PetscFree2(bi_ptr, im)); 378 379 /* put together the new matrix */ 380 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 381 b = (Mat_SeqAIJ *)(B)->data; 382 383 b->free_a = PETSC_TRUE; 384 b->free_ij = PETSC_TRUE; 385 b->singlemalloc = PETSC_FALSE; 386 387 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 388 389 b->j = bj; 390 b->i = bi; 391 b->diag = bdiag; 392 b->ilen = NULL; 393 b->imax = NULL; 394 b->row = isrow; 395 b->col = iscol; 396 PetscCall(PetscObjectReference((PetscObject)isrow)); 397 PetscCall(PetscObjectReference((PetscObject)iscol)); 398 b->icol = isicol; 399 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 400 401 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 402 b->maxnz = b->nz = bdiag[0] + 1; 403 404 B->factortype = MAT_FACTOR_LU; 405 B->info.factor_mallocs = reallocs; 406 B->info.fill_ratio_given = f; 407 408 if (ai[n]) { 409 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 410 } else { 411 B->info.fill_ratio_needed = 0.0; 412 } 413 #if defined(PETSC_USE_INFO) 414 if (ai[n] != 0) { 415 PetscReal af = B->info.fill_ratio_needed; 416 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 417 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 418 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 419 PetscCall(PetscInfo(A, "for best performance.\n")); 420 } else { 421 PetscCall(PetscInfo(A, "Empty matrix\n")); 422 } 423 #endif 424 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 425 if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 426 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 427 PetscFunctionReturn(PETSC_SUCCESS); 428 } 429 430 /* 431 Trouble in factorization, should we dump the original matrix? 432 */ 433 PetscErrorCode MatFactorDumpMatrix(Mat A) 434 { 435 PetscBool flg = PETSC_FALSE; 436 437 PetscFunctionBegin; 438 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 439 if (flg) { 440 PetscViewer viewer; 441 char filename[PETSC_MAX_PATH_LEN]; 442 443 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 444 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 445 PetscCall(MatView(A, viewer)); 446 PetscCall(PetscViewerDestroy(&viewer)); 447 } 448 PetscFunctionReturn(PETSC_SUCCESS); 449 } 450 451 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 452 { 453 Mat C = B; 454 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 455 IS isrow = b->row, isicol = b->icol; 456 const PetscInt *r, *ic, *ics; 457 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 458 PetscInt i, j, k, nz, nzL, row, *pj; 459 const PetscInt *ajtmp, *bjtmp; 460 MatScalar *rtmp, *pc, multiplier, *pv; 461 const MatScalar *aa = a->a, *v; 462 PetscBool row_identity, col_identity; 463 FactorShiftCtx sctx; 464 const PetscInt *ddiag; 465 PetscReal rs; 466 MatScalar d; 467 468 PetscFunctionBegin; 469 /* MatPivotSetUp(): initialize shift context sctx */ 470 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 471 472 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 473 ddiag = a->diag; 474 sctx.shift_top = info->zeropivot; 475 for (i = 0; i < n; i++) { 476 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 477 d = (aa)[ddiag[i]]; 478 rs = -PetscAbsScalar(d) - PetscRealPart(d); 479 v = aa + ai[i]; 480 nz = ai[i + 1] - ai[i]; 481 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 482 if (rs > sctx.shift_top) sctx.shift_top = rs; 483 } 484 sctx.shift_top *= 1.1; 485 sctx.nshift_max = 5; 486 sctx.shift_lo = 0.; 487 sctx.shift_hi = 1.; 488 } 489 490 PetscCall(ISGetIndices(isrow, &r)); 491 PetscCall(ISGetIndices(isicol, &ic)); 492 PetscCall(PetscMalloc1(n + 1, &rtmp)); 493 ics = ic; 494 495 do { 496 sctx.newshift = PETSC_FALSE; 497 for (i = 0; i < n; i++) { 498 /* zero rtmp */ 499 /* L part */ 500 nz = bi[i + 1] - bi[i]; 501 bjtmp = bj + bi[i]; 502 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 503 504 /* U part */ 505 nz = bdiag[i] - bdiag[i + 1]; 506 bjtmp = bj + bdiag[i + 1] + 1; 507 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 508 509 /* load in initial (unfactored row) */ 510 nz = ai[r[i] + 1] - ai[r[i]]; 511 ajtmp = aj + ai[r[i]]; 512 v = aa + ai[r[i]]; 513 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 514 /* ZeropivotApply() */ 515 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 516 517 /* elimination */ 518 bjtmp = bj + bi[i]; 519 row = *bjtmp++; 520 nzL = bi[i + 1] - bi[i]; 521 for (k = 0; k < nzL; k++) { 522 pc = rtmp + row; 523 if (*pc != 0.0) { 524 pv = b->a + bdiag[row]; 525 multiplier = *pc * (*pv); 526 *pc = multiplier; 527 528 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 529 pv = b->a + bdiag[row + 1] + 1; 530 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 531 532 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 533 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 534 } 535 row = *bjtmp++; 536 } 537 538 /* finished row so stick it into b->a */ 539 rs = 0.0; 540 /* L part */ 541 pv = b->a + bi[i]; 542 pj = b->j + bi[i]; 543 nz = bi[i + 1] - bi[i]; 544 for (j = 0; j < nz; j++) { 545 pv[j] = rtmp[pj[j]]; 546 rs += PetscAbsScalar(pv[j]); 547 } 548 549 /* U part */ 550 pv = b->a + bdiag[i + 1] + 1; 551 pj = b->j + bdiag[i + 1] + 1; 552 nz = bdiag[i] - bdiag[i + 1] - 1; 553 for (j = 0; j < nz; j++) { 554 pv[j] = rtmp[pj[j]]; 555 rs += PetscAbsScalar(pv[j]); 556 } 557 558 sctx.rs = rs; 559 sctx.pv = rtmp[i]; 560 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 561 if (sctx.newshift) break; /* break for-loop */ 562 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 563 564 /* Mark diagonal and invert diagonal for simpler triangular solves */ 565 pv = b->a + bdiag[i]; 566 *pv = 1.0 / rtmp[i]; 567 568 } /* endof for (i=0; i<n; i++) { */ 569 570 /* MatPivotRefine() */ 571 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 572 /* 573 * if no shift in this attempt & shifting & started shifting & can refine, 574 * then try lower shift 575 */ 576 sctx.shift_hi = sctx.shift_fraction; 577 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 578 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 579 sctx.newshift = PETSC_TRUE; 580 sctx.nshift++; 581 } 582 } while (sctx.newshift); 583 584 PetscCall(PetscFree(rtmp)); 585 PetscCall(ISRestoreIndices(isicol, &ic)); 586 PetscCall(ISRestoreIndices(isrow, &r)); 587 588 PetscCall(ISIdentity(isrow, &row_identity)); 589 PetscCall(ISIdentity(isicol, &col_identity)); 590 if (b->inode.size) { 591 C->ops->solve = MatSolve_SeqAIJ_Inode; 592 } else if (row_identity && col_identity) { 593 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 594 } else { 595 C->ops->solve = MatSolve_SeqAIJ; 596 } 597 C->ops->solveadd = MatSolveAdd_SeqAIJ; 598 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 599 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 600 C->ops->matsolve = MatMatSolve_SeqAIJ; 601 C->assembled = PETSC_TRUE; 602 C->preallocated = PETSC_TRUE; 603 604 PetscCall(PetscLogFlops(C->cmap->n)); 605 606 /* MatShiftView(A,info,&sctx) */ 607 if (sctx.nshift) { 608 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 609 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 610 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 611 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 612 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 613 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 614 } 615 } 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 620 { 621 Mat C = B; 622 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 623 IS isrow = b->row, isicol = b->icol; 624 const PetscInt *r, *ic, *ics; 625 PetscInt nz, row, i, j, n = A->rmap->n, diag; 626 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 627 const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 628 MatScalar *pv, *rtmp, *pc, multiplier, d; 629 const MatScalar *v, *aa = a->a; 630 PetscReal rs = 0.0; 631 FactorShiftCtx sctx; 632 const PetscInt *ddiag; 633 PetscBool row_identity, col_identity; 634 635 PetscFunctionBegin; 636 /* MatPivotSetUp(): initialize shift context sctx */ 637 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 638 639 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 640 ddiag = a->diag; 641 sctx.shift_top = info->zeropivot; 642 for (i = 0; i < n; i++) { 643 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 644 d = (aa)[ddiag[i]]; 645 rs = -PetscAbsScalar(d) - PetscRealPart(d); 646 v = aa + ai[i]; 647 nz = ai[i + 1] - ai[i]; 648 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 649 if (rs > sctx.shift_top) sctx.shift_top = rs; 650 } 651 sctx.shift_top *= 1.1; 652 sctx.nshift_max = 5; 653 sctx.shift_lo = 0.; 654 sctx.shift_hi = 1.; 655 } 656 657 PetscCall(ISGetIndices(isrow, &r)); 658 PetscCall(ISGetIndices(isicol, &ic)); 659 PetscCall(PetscMalloc1(n + 1, &rtmp)); 660 ics = ic; 661 662 do { 663 sctx.newshift = PETSC_FALSE; 664 for (i = 0; i < n; i++) { 665 nz = bi[i + 1] - bi[i]; 666 bjtmp = bj + bi[i]; 667 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 668 669 /* load in initial (unfactored row) */ 670 nz = ai[r[i] + 1] - ai[r[i]]; 671 ajtmp = aj + ai[r[i]]; 672 v = aa + ai[r[i]]; 673 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 674 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 675 676 row = *bjtmp++; 677 while (row < i) { 678 pc = rtmp + row; 679 if (*pc != 0.0) { 680 pv = b->a + diag_offset[row]; 681 pj = b->j + diag_offset[row] + 1; 682 multiplier = *pc / *pv++; 683 *pc = multiplier; 684 nz = bi[row + 1] - diag_offset[row] - 1; 685 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 686 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 687 } 688 row = *bjtmp++; 689 } 690 /* finished row so stick it into b->a */ 691 pv = b->a + bi[i]; 692 pj = b->j + bi[i]; 693 nz = bi[i + 1] - bi[i]; 694 diag = diag_offset[i] - bi[i]; 695 rs = 0.0; 696 for (j = 0; j < nz; j++) { 697 pv[j] = rtmp[pj[j]]; 698 rs += PetscAbsScalar(pv[j]); 699 } 700 rs -= PetscAbsScalar(pv[diag]); 701 702 sctx.rs = rs; 703 sctx.pv = pv[diag]; 704 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 705 if (sctx.newshift) break; 706 pv[diag] = sctx.pv; 707 } 708 709 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 710 /* 711 * if no shift in this attempt & shifting & started shifting & can refine, 712 * then try lower shift 713 */ 714 sctx.shift_hi = sctx.shift_fraction; 715 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 716 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 717 sctx.newshift = PETSC_TRUE; 718 sctx.nshift++; 719 } 720 } while (sctx.newshift); 721 722 /* invert diagonal entries for simpler triangular solves */ 723 for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; 724 PetscCall(PetscFree(rtmp)); 725 PetscCall(ISRestoreIndices(isicol, &ic)); 726 PetscCall(ISRestoreIndices(isrow, &r)); 727 728 PetscCall(ISIdentity(isrow, &row_identity)); 729 PetscCall(ISIdentity(isicol, &col_identity)); 730 if (row_identity && col_identity) { 731 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 732 } else { 733 C->ops->solve = MatSolve_SeqAIJ_inplace; 734 } 735 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 736 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 737 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 738 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 739 740 C->assembled = PETSC_TRUE; 741 C->preallocated = PETSC_TRUE; 742 743 PetscCall(PetscLogFlops(C->cmap->n)); 744 if (sctx.nshift) { 745 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 746 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 747 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 748 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 749 } 750 } 751 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 752 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 753 754 PetscCall(MatSeqAIJCheckInode(C)); 755 PetscFunctionReturn(PETSC_SUCCESS); 756 } 757 758 /* 759 This routine implements inplace ILU(0) with row or/and column permutations. 760 Input: 761 A - original matrix 762 Output; 763 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 764 a->j (col index) is permuted by the inverse of colperm, then sorted 765 a->a reordered accordingly with a->j 766 a->diag (ptr to diagonal elements) is updated. 767 */ 768 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 769 { 770 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 771 IS isrow = a->row, isicol = a->icol; 772 const PetscInt *r, *ic, *ics; 773 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 774 PetscInt *ajtmp, nz, row; 775 PetscInt *diag = a->diag, nbdiag, *pj; 776 PetscScalar *rtmp, *pc, multiplier, d; 777 MatScalar *pv, *v; 778 PetscReal rs; 779 FactorShiftCtx sctx; 780 const MatScalar *aa = a->a, *vtmp; 781 782 PetscFunctionBegin; 783 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 784 785 /* MatPivotSetUp(): initialize shift context sctx */ 786 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 787 788 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 789 const PetscInt *ddiag = a->diag; 790 sctx.shift_top = info->zeropivot; 791 for (i = 0; i < n; i++) { 792 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 793 d = (aa)[ddiag[i]]; 794 rs = -PetscAbsScalar(d) - PetscRealPart(d); 795 vtmp = aa + ai[i]; 796 nz = ai[i + 1] - ai[i]; 797 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 798 if (rs > sctx.shift_top) sctx.shift_top = rs; 799 } 800 sctx.shift_top *= 1.1; 801 sctx.nshift_max = 5; 802 sctx.shift_lo = 0.; 803 sctx.shift_hi = 1.; 804 } 805 806 PetscCall(ISGetIndices(isrow, &r)); 807 PetscCall(ISGetIndices(isicol, &ic)); 808 PetscCall(PetscMalloc1(n + 1, &rtmp)); 809 PetscCall(PetscArrayzero(rtmp, n + 1)); 810 ics = ic; 811 812 #if defined(MV) 813 sctx.shift_top = 0.; 814 sctx.nshift_max = 0; 815 sctx.shift_lo = 0.; 816 sctx.shift_hi = 0.; 817 sctx.shift_fraction = 0.; 818 819 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 820 sctx.shift_top = 0.; 821 for (i = 0; i < n; i++) { 822 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 823 d = (a->a)[diag[i]]; 824 rs = -PetscAbsScalar(d) - PetscRealPart(d); 825 v = a->a + ai[i]; 826 nz = ai[i + 1] - ai[i]; 827 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 828 if (rs > sctx.shift_top) sctx.shift_top = rs; 829 } 830 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 831 sctx.shift_top *= 1.1; 832 sctx.nshift_max = 5; 833 sctx.shift_lo = 0.; 834 sctx.shift_hi = 1.; 835 } 836 837 sctx.shift_amount = 0.; 838 sctx.nshift = 0; 839 #endif 840 841 do { 842 sctx.newshift = PETSC_FALSE; 843 for (i = 0; i < n; i++) { 844 /* load in initial unfactored row */ 845 nz = ai[r[i] + 1] - ai[r[i]]; 846 ajtmp = aj + ai[r[i]]; 847 v = a->a + ai[r[i]]; 848 /* sort permuted ajtmp and values v accordingly */ 849 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 850 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 851 852 diag[r[i]] = ai[r[i]]; 853 for (j = 0; j < nz; j++) { 854 rtmp[ajtmp[j]] = v[j]; 855 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 856 } 857 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 858 859 row = *ajtmp++; 860 while (row < i) { 861 pc = rtmp + row; 862 if (*pc != 0.0) { 863 pv = a->a + diag[r[row]]; 864 pj = aj + diag[r[row]] + 1; 865 866 multiplier = *pc / *pv++; 867 *pc = multiplier; 868 nz = ai[r[row] + 1] - diag[r[row]] - 1; 869 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 870 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 871 } 872 row = *ajtmp++; 873 } 874 /* finished row so overwrite it onto a->a */ 875 pv = a->a + ai[r[i]]; 876 pj = aj + ai[r[i]]; 877 nz = ai[r[i] + 1] - ai[r[i]]; 878 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 879 880 rs = 0.0; 881 for (j = 0; j < nz; j++) { 882 pv[j] = rtmp[pj[j]]; 883 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 884 } 885 886 sctx.rs = rs; 887 sctx.pv = pv[nbdiag]; 888 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 889 if (sctx.newshift) break; 890 pv[nbdiag] = sctx.pv; 891 } 892 893 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 894 /* 895 * if no shift in this attempt & shifting & started shifting & can refine, 896 * then try lower shift 897 */ 898 sctx.shift_hi = sctx.shift_fraction; 899 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 900 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 901 sctx.newshift = PETSC_TRUE; 902 sctx.nshift++; 903 } 904 } while (sctx.newshift); 905 906 /* invert diagonal entries for simpler triangular solves */ 907 for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; 908 909 PetscCall(PetscFree(rtmp)); 910 PetscCall(ISRestoreIndices(isicol, &ic)); 911 PetscCall(ISRestoreIndices(isrow, &r)); 912 913 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 914 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 915 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 916 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 917 918 A->assembled = PETSC_TRUE; 919 A->preallocated = PETSC_TRUE; 920 921 PetscCall(PetscLogFlops(A->cmap->n)); 922 if (sctx.nshift) { 923 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 924 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 925 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 926 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 927 } 928 } 929 PetscFunctionReturn(PETSC_SUCCESS); 930 } 931 932 /* ----------------------------------------------------------- */ 933 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 934 { 935 Mat C; 936 937 PetscFunctionBegin; 938 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 939 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 940 PetscCall(MatLUFactorNumeric(C, A, info)); 941 942 A->ops->solve = C->ops->solve; 943 A->ops->solvetranspose = C->ops->solvetranspose; 944 945 PetscCall(MatHeaderMerge(A, &C)); 946 PetscFunctionReturn(PETSC_SUCCESS); 947 } 948 /* ----------------------------------------------------------- */ 949 950 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 951 { 952 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 953 IS iscol = a->col, isrow = a->row; 954 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 955 PetscInt nz; 956 const PetscInt *rout, *cout, *r, *c; 957 PetscScalar *x, *tmp, *tmps, sum; 958 const PetscScalar *b; 959 const MatScalar *aa = a->a, *v; 960 961 PetscFunctionBegin; 962 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 963 964 PetscCall(VecGetArrayRead(bb, &b)); 965 PetscCall(VecGetArrayWrite(xx, &x)); 966 tmp = a->solve_work; 967 968 PetscCall(ISGetIndices(isrow, &rout)); 969 r = rout; 970 PetscCall(ISGetIndices(iscol, &cout)); 971 c = cout + (n - 1); 972 973 /* forward solve the lower triangular */ 974 tmp[0] = b[*r++]; 975 tmps = tmp; 976 for (i = 1; i < n; i++) { 977 v = aa + ai[i]; 978 vi = aj + ai[i]; 979 nz = a->diag[i] - ai[i]; 980 sum = b[*r++]; 981 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 982 tmp[i] = sum; 983 } 984 985 /* backward solve the upper triangular */ 986 for (i = n - 1; i >= 0; i--) { 987 v = aa + a->diag[i] + 1; 988 vi = aj + a->diag[i] + 1; 989 nz = ai[i + 1] - a->diag[i] - 1; 990 sum = tmp[i]; 991 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 992 x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 993 } 994 995 PetscCall(ISRestoreIndices(isrow, &rout)); 996 PetscCall(ISRestoreIndices(iscol, &cout)); 997 PetscCall(VecRestoreArrayRead(bb, &b)); 998 PetscCall(VecRestoreArrayWrite(xx, &x)); 999 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1000 PetscFunctionReturn(PETSC_SUCCESS); 1001 } 1002 1003 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 1004 { 1005 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1006 IS iscol = a->col, isrow = a->row; 1007 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1008 PetscInt nz, neq, ldb, ldx; 1009 const PetscInt *rout, *cout, *r, *c; 1010 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1011 const PetscScalar *b, *aa = a->a, *v; 1012 PetscBool isdense; 1013 1014 PetscFunctionBegin; 1015 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1016 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1017 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1018 if (X != B) { 1019 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1020 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1021 } 1022 PetscCall(MatDenseGetArrayRead(B, &b)); 1023 PetscCall(MatDenseGetLDA(B, &ldb)); 1024 PetscCall(MatDenseGetArray(X, &x)); 1025 PetscCall(MatDenseGetLDA(X, &ldx)); 1026 PetscCall(ISGetIndices(isrow, &rout)); 1027 r = rout; 1028 PetscCall(ISGetIndices(iscol, &cout)); 1029 c = cout; 1030 for (neq = 0; neq < B->cmap->n; neq++) { 1031 /* forward solve the lower triangular */ 1032 tmp[0] = b[r[0]]; 1033 tmps = tmp; 1034 for (i = 1; i < n; i++) { 1035 v = aa + ai[i]; 1036 vi = aj + ai[i]; 1037 nz = a->diag[i] - ai[i]; 1038 sum = b[r[i]]; 1039 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1040 tmp[i] = sum; 1041 } 1042 /* backward solve the upper triangular */ 1043 for (i = n - 1; i >= 0; i--) { 1044 v = aa + a->diag[i] + 1; 1045 vi = aj + a->diag[i] + 1; 1046 nz = ai[i + 1] - a->diag[i] - 1; 1047 sum = tmp[i]; 1048 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1049 x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 1050 } 1051 b += ldb; 1052 x += ldx; 1053 } 1054 PetscCall(ISRestoreIndices(isrow, &rout)); 1055 PetscCall(ISRestoreIndices(iscol, &cout)); 1056 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1057 PetscCall(MatDenseRestoreArray(X, &x)); 1058 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1059 PetscFunctionReturn(PETSC_SUCCESS); 1060 } 1061 1062 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 1063 { 1064 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1065 IS iscol = a->col, isrow = a->row; 1066 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1067 PetscInt nz, neq, ldb, ldx; 1068 const PetscInt *rout, *cout, *r, *c; 1069 PetscScalar *x, *tmp = a->solve_work, sum; 1070 const PetscScalar *b, *aa = a->a, *v; 1071 PetscBool isdense; 1072 1073 PetscFunctionBegin; 1074 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1075 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1076 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1077 if (X != B) { 1078 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1079 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1080 } 1081 PetscCall(MatDenseGetArrayRead(B, &b)); 1082 PetscCall(MatDenseGetLDA(B, &ldb)); 1083 PetscCall(MatDenseGetArray(X, &x)); 1084 PetscCall(MatDenseGetLDA(X, &ldx)); 1085 PetscCall(ISGetIndices(isrow, &rout)); 1086 r = rout; 1087 PetscCall(ISGetIndices(iscol, &cout)); 1088 c = cout; 1089 for (neq = 0; neq < B->cmap->n; neq++) { 1090 /* forward solve the lower triangular */ 1091 tmp[0] = b[r[0]]; 1092 v = aa; 1093 vi = aj; 1094 for (i = 1; i < n; i++) { 1095 nz = ai[i + 1] - ai[i]; 1096 sum = b[r[i]]; 1097 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1098 tmp[i] = sum; 1099 v += nz; 1100 vi += nz; 1101 } 1102 /* backward solve the upper triangular */ 1103 for (i = n - 1; i >= 0; i--) { 1104 v = aa + adiag[i + 1] + 1; 1105 vi = aj + adiag[i + 1] + 1; 1106 nz = adiag[i] - adiag[i + 1] - 1; 1107 sum = tmp[i]; 1108 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1109 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 1110 } 1111 b += ldb; 1112 x += ldx; 1113 } 1114 PetscCall(ISRestoreIndices(isrow, &rout)); 1115 PetscCall(ISRestoreIndices(iscol, &cout)); 1116 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1117 PetscCall(MatDenseRestoreArray(X, &x)); 1118 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1119 PetscFunctionReturn(PETSC_SUCCESS); 1120 } 1121 1122 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 1123 { 1124 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1125 IS iscol = a->col, isrow = a->row; 1126 const PetscInt *r, *c, *rout, *cout; 1127 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1128 PetscInt nz, row; 1129 PetscScalar *x, *tmp, *tmps, sum; 1130 const PetscScalar *b; 1131 const MatScalar *aa = a->a, *v; 1132 1133 PetscFunctionBegin; 1134 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1135 1136 PetscCall(VecGetArrayRead(bb, &b)); 1137 PetscCall(VecGetArrayWrite(xx, &x)); 1138 tmp = a->solve_work; 1139 1140 PetscCall(ISGetIndices(isrow, &rout)); 1141 r = rout; 1142 PetscCall(ISGetIndices(iscol, &cout)); 1143 c = cout + (n - 1); 1144 1145 /* forward solve the lower triangular */ 1146 tmp[0] = b[*r++]; 1147 tmps = tmp; 1148 for (row = 1; row < n; row++) { 1149 i = rout[row]; /* permuted row */ 1150 v = aa + ai[i]; 1151 vi = aj + ai[i]; 1152 nz = a->diag[i] - ai[i]; 1153 sum = b[*r++]; 1154 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1155 tmp[row] = sum; 1156 } 1157 1158 /* backward solve the upper triangular */ 1159 for (row = n - 1; row >= 0; row--) { 1160 i = rout[row]; /* permuted row */ 1161 v = aa + a->diag[i] + 1; 1162 vi = aj + a->diag[i] + 1; 1163 nz = ai[i + 1] - a->diag[i] - 1; 1164 sum = tmp[row]; 1165 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1166 x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1167 } 1168 1169 PetscCall(ISRestoreIndices(isrow, &rout)); 1170 PetscCall(ISRestoreIndices(iscol, &cout)); 1171 PetscCall(VecRestoreArrayRead(bb, &b)); 1172 PetscCall(VecRestoreArrayWrite(xx, &x)); 1173 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1174 PetscFunctionReturn(PETSC_SUCCESS); 1175 } 1176 1177 /* ----------------------------------------------------------- */ 1178 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1179 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1180 { 1181 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1182 PetscInt n = A->rmap->n; 1183 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1184 PetscScalar *x; 1185 const PetscScalar *b; 1186 const MatScalar *aa = a->a; 1187 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1188 PetscInt adiag_i, i, nz, ai_i; 1189 const PetscInt *vi; 1190 const MatScalar *v; 1191 PetscScalar sum; 1192 #endif 1193 1194 PetscFunctionBegin; 1195 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1196 1197 PetscCall(VecGetArrayRead(bb, &b)); 1198 PetscCall(VecGetArrayWrite(xx, &x)); 1199 1200 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1201 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1202 #else 1203 /* forward solve the lower triangular */ 1204 x[0] = b[0]; 1205 for (i = 1; i < n; i++) { 1206 ai_i = ai[i]; 1207 v = aa + ai_i; 1208 vi = aj + ai_i; 1209 nz = adiag[i] - ai_i; 1210 sum = b[i]; 1211 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1212 x[i] = sum; 1213 } 1214 1215 /* backward solve the upper triangular */ 1216 for (i = n - 1; i >= 0; i--) { 1217 adiag_i = adiag[i]; 1218 v = aa + adiag_i + 1; 1219 vi = aj + adiag_i + 1; 1220 nz = ai[i + 1] - adiag_i - 1; 1221 sum = x[i]; 1222 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1223 x[i] = sum * aa[adiag_i]; 1224 } 1225 #endif 1226 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1227 PetscCall(VecRestoreArrayRead(bb, &b)); 1228 PetscCall(VecRestoreArrayWrite(xx, &x)); 1229 PetscFunctionReturn(PETSC_SUCCESS); 1230 } 1231 1232 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1233 { 1234 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1235 IS iscol = a->col, isrow = a->row; 1236 PetscInt i, n = A->rmap->n, j; 1237 PetscInt nz; 1238 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 1239 PetscScalar *x, *tmp, sum; 1240 const PetscScalar *b; 1241 const MatScalar *aa = a->a, *v; 1242 1243 PetscFunctionBegin; 1244 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1245 1246 PetscCall(VecGetArrayRead(bb, &b)); 1247 PetscCall(VecGetArray(xx, &x)); 1248 tmp = a->solve_work; 1249 1250 PetscCall(ISGetIndices(isrow, &rout)); 1251 r = rout; 1252 PetscCall(ISGetIndices(iscol, &cout)); 1253 c = cout + (n - 1); 1254 1255 /* forward solve the lower triangular */ 1256 tmp[0] = b[*r++]; 1257 for (i = 1; i < n; i++) { 1258 v = aa + ai[i]; 1259 vi = aj + ai[i]; 1260 nz = a->diag[i] - ai[i]; 1261 sum = b[*r++]; 1262 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1263 tmp[i] = sum; 1264 } 1265 1266 /* backward solve the upper triangular */ 1267 for (i = n - 1; i >= 0; i--) { 1268 v = aa + a->diag[i] + 1; 1269 vi = aj + a->diag[i] + 1; 1270 nz = ai[i + 1] - a->diag[i] - 1; 1271 sum = tmp[i]; 1272 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1273 tmp[i] = sum * aa[a->diag[i]]; 1274 x[*c--] += tmp[i]; 1275 } 1276 1277 PetscCall(ISRestoreIndices(isrow, &rout)); 1278 PetscCall(ISRestoreIndices(iscol, &cout)); 1279 PetscCall(VecRestoreArrayRead(bb, &b)); 1280 PetscCall(VecRestoreArray(xx, &x)); 1281 PetscCall(PetscLogFlops(2.0 * a->nz)); 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 1285 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1286 { 1287 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1288 IS iscol = a->col, isrow = a->row; 1289 PetscInt i, n = A->rmap->n, j; 1290 PetscInt nz; 1291 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1292 PetscScalar *x, *tmp, sum; 1293 const PetscScalar *b; 1294 const MatScalar *aa = a->a, *v; 1295 1296 PetscFunctionBegin; 1297 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1298 1299 PetscCall(VecGetArrayRead(bb, &b)); 1300 PetscCall(VecGetArray(xx, &x)); 1301 tmp = a->solve_work; 1302 1303 PetscCall(ISGetIndices(isrow, &rout)); 1304 r = rout; 1305 PetscCall(ISGetIndices(iscol, &cout)); 1306 c = cout; 1307 1308 /* forward solve the lower triangular */ 1309 tmp[0] = b[r[0]]; 1310 v = aa; 1311 vi = aj; 1312 for (i = 1; i < n; i++) { 1313 nz = ai[i + 1] - ai[i]; 1314 sum = b[r[i]]; 1315 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1316 tmp[i] = sum; 1317 v += nz; 1318 vi += nz; 1319 } 1320 1321 /* backward solve the upper triangular */ 1322 v = aa + adiag[n - 1]; 1323 vi = aj + adiag[n - 1]; 1324 for (i = n - 1; i >= 0; i--) { 1325 nz = adiag[i] - adiag[i + 1] - 1; 1326 sum = tmp[i]; 1327 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1328 tmp[i] = sum * v[nz]; 1329 x[c[i]] += tmp[i]; 1330 v += nz + 1; 1331 vi += nz + 1; 1332 } 1333 1334 PetscCall(ISRestoreIndices(isrow, &rout)); 1335 PetscCall(ISRestoreIndices(iscol, &cout)); 1336 PetscCall(VecRestoreArrayRead(bb, &b)); 1337 PetscCall(VecRestoreArray(xx, &x)); 1338 PetscCall(PetscLogFlops(2.0 * a->nz)); 1339 PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341 1342 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1343 { 1344 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1345 IS iscol = a->col, isrow = a->row; 1346 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1347 PetscInt i, n = A->rmap->n, j; 1348 PetscInt nz; 1349 PetscScalar *x, *tmp, s1; 1350 const MatScalar *aa = a->a, *v; 1351 const PetscScalar *b; 1352 1353 PetscFunctionBegin; 1354 PetscCall(VecGetArrayRead(bb, &b)); 1355 PetscCall(VecGetArrayWrite(xx, &x)); 1356 tmp = a->solve_work; 1357 1358 PetscCall(ISGetIndices(isrow, &rout)); 1359 r = rout; 1360 PetscCall(ISGetIndices(iscol, &cout)); 1361 c = cout; 1362 1363 /* copy the b into temp work space according to permutation */ 1364 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1365 1366 /* forward solve the U^T */ 1367 for (i = 0; i < n; i++) { 1368 v = aa + diag[i]; 1369 vi = aj + diag[i] + 1; 1370 nz = ai[i + 1] - diag[i] - 1; 1371 s1 = tmp[i]; 1372 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1373 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1374 tmp[i] = s1; 1375 } 1376 1377 /* backward solve the L^T */ 1378 for (i = n - 1; i >= 0; i--) { 1379 v = aa + diag[i] - 1; 1380 vi = aj + diag[i] - 1; 1381 nz = diag[i] - ai[i]; 1382 s1 = tmp[i]; 1383 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1384 } 1385 1386 /* copy tmp into x according to permutation */ 1387 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1388 1389 PetscCall(ISRestoreIndices(isrow, &rout)); 1390 PetscCall(ISRestoreIndices(iscol, &cout)); 1391 PetscCall(VecRestoreArrayRead(bb, &b)); 1392 PetscCall(VecRestoreArrayWrite(xx, &x)); 1393 1394 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1395 PetscFunctionReturn(PETSC_SUCCESS); 1396 } 1397 1398 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1399 { 1400 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1401 IS iscol = a->col, isrow = a->row; 1402 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1403 PetscInt i, n = A->rmap->n, j; 1404 PetscInt nz; 1405 PetscScalar *x, *tmp, s1; 1406 const MatScalar *aa = a->a, *v; 1407 const PetscScalar *b; 1408 1409 PetscFunctionBegin; 1410 PetscCall(VecGetArrayRead(bb, &b)); 1411 PetscCall(VecGetArrayWrite(xx, &x)); 1412 tmp = a->solve_work; 1413 1414 PetscCall(ISGetIndices(isrow, &rout)); 1415 r = rout; 1416 PetscCall(ISGetIndices(iscol, &cout)); 1417 c = cout; 1418 1419 /* copy the b into temp work space according to permutation */ 1420 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1421 1422 /* forward solve the U^T */ 1423 for (i = 0; i < n; i++) { 1424 v = aa + adiag[i + 1] + 1; 1425 vi = aj + adiag[i + 1] + 1; 1426 nz = adiag[i] - adiag[i + 1] - 1; 1427 s1 = tmp[i]; 1428 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1429 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1430 tmp[i] = s1; 1431 } 1432 1433 /* backward solve the L^T */ 1434 for (i = n - 1; i >= 0; i--) { 1435 v = aa + ai[i]; 1436 vi = aj + ai[i]; 1437 nz = ai[i + 1] - ai[i]; 1438 s1 = tmp[i]; 1439 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1440 } 1441 1442 /* copy tmp into x according to permutation */ 1443 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1444 1445 PetscCall(ISRestoreIndices(isrow, &rout)); 1446 PetscCall(ISRestoreIndices(iscol, &cout)); 1447 PetscCall(VecRestoreArrayRead(bb, &b)); 1448 PetscCall(VecRestoreArrayWrite(xx, &x)); 1449 1450 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1451 PetscFunctionReturn(PETSC_SUCCESS); 1452 } 1453 1454 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1455 { 1456 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1457 IS iscol = a->col, isrow = a->row; 1458 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1459 PetscInt i, n = A->rmap->n, j; 1460 PetscInt nz; 1461 PetscScalar *x, *tmp, s1; 1462 const MatScalar *aa = a->a, *v; 1463 const PetscScalar *b; 1464 1465 PetscFunctionBegin; 1466 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1467 PetscCall(VecGetArrayRead(bb, &b)); 1468 PetscCall(VecGetArray(xx, &x)); 1469 tmp = a->solve_work; 1470 1471 PetscCall(ISGetIndices(isrow, &rout)); 1472 r = rout; 1473 PetscCall(ISGetIndices(iscol, &cout)); 1474 c = cout; 1475 1476 /* copy the b into temp work space according to permutation */ 1477 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1478 1479 /* forward solve the U^T */ 1480 for (i = 0; i < n; i++) { 1481 v = aa + diag[i]; 1482 vi = aj + diag[i] + 1; 1483 nz = ai[i + 1] - diag[i] - 1; 1484 s1 = tmp[i]; 1485 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1486 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1487 tmp[i] = s1; 1488 } 1489 1490 /* backward solve the L^T */ 1491 for (i = n - 1; i >= 0; i--) { 1492 v = aa + diag[i] - 1; 1493 vi = aj + diag[i] - 1; 1494 nz = diag[i] - ai[i]; 1495 s1 = tmp[i]; 1496 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1497 } 1498 1499 /* copy tmp into x according to permutation */ 1500 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1501 1502 PetscCall(ISRestoreIndices(isrow, &rout)); 1503 PetscCall(ISRestoreIndices(iscol, &cout)); 1504 PetscCall(VecRestoreArrayRead(bb, &b)); 1505 PetscCall(VecRestoreArray(xx, &x)); 1506 1507 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1508 PetscFunctionReturn(PETSC_SUCCESS); 1509 } 1510 1511 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1512 { 1513 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1514 IS iscol = a->col, isrow = a->row; 1515 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1516 PetscInt i, n = A->rmap->n, j; 1517 PetscInt nz; 1518 PetscScalar *x, *tmp, s1; 1519 const MatScalar *aa = a->a, *v; 1520 const PetscScalar *b; 1521 1522 PetscFunctionBegin; 1523 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1524 PetscCall(VecGetArrayRead(bb, &b)); 1525 PetscCall(VecGetArray(xx, &x)); 1526 tmp = a->solve_work; 1527 1528 PetscCall(ISGetIndices(isrow, &rout)); 1529 r = rout; 1530 PetscCall(ISGetIndices(iscol, &cout)); 1531 c = cout; 1532 1533 /* copy the b into temp work space according to permutation */ 1534 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1535 1536 /* forward solve the U^T */ 1537 for (i = 0; i < n; i++) { 1538 v = aa + adiag[i + 1] + 1; 1539 vi = aj + adiag[i + 1] + 1; 1540 nz = adiag[i] - adiag[i + 1] - 1; 1541 s1 = tmp[i]; 1542 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1543 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1544 tmp[i] = s1; 1545 } 1546 1547 /* backward solve the L^T */ 1548 for (i = n - 1; i >= 0; i--) { 1549 v = aa + ai[i]; 1550 vi = aj + ai[i]; 1551 nz = ai[i + 1] - ai[i]; 1552 s1 = tmp[i]; 1553 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1554 } 1555 1556 /* copy tmp into x according to permutation */ 1557 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1558 1559 PetscCall(ISRestoreIndices(isrow, &rout)); 1560 PetscCall(ISRestoreIndices(iscol, &cout)); 1561 PetscCall(VecRestoreArrayRead(bb, &b)); 1562 PetscCall(VecRestoreArray(xx, &x)); 1563 1564 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1565 PetscFunctionReturn(PETSC_SUCCESS); 1566 } 1567 1568 /* ----------------------------------------------------------------*/ 1569 1570 /* 1571 ilu() under revised new data structure. 1572 Factored arrays bj and ba are stored as 1573 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1574 1575 bi=fact->i is an array of size n+1, in which 1576 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1577 bi[n]: points to L(n-1,n-1)+1 1578 1579 bdiag=fact->diag is an array of size n+1,in which 1580 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1581 bdiag[n]: points to entry of U(n-1,0)-1 1582 1583 U(i,:) contains bdiag[i] as its last entry, i.e., 1584 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1585 */ 1586 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1587 { 1588 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1589 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1590 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1591 IS isicol; 1592 1593 PetscFunctionBegin; 1594 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1595 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1596 b = (Mat_SeqAIJ *)(fact)->data; 1597 1598 /* allocate matrix arrays for new data structure */ 1599 PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i)); 1600 1601 b->singlemalloc = PETSC_TRUE; 1602 if (!b->diag) { PetscCall(PetscMalloc1(n + 1, &b->diag)); } 1603 bdiag = b->diag; 1604 1605 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1606 1607 /* set bi and bj with new data structure */ 1608 bi = b->i; 1609 bj = b->j; 1610 1611 /* L part */ 1612 bi[0] = 0; 1613 for (i = 0; i < n; i++) { 1614 nz = adiag[i] - ai[i]; 1615 bi[i + 1] = bi[i] + nz; 1616 aj = a->j + ai[i]; 1617 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1618 } 1619 1620 /* U part */ 1621 bdiag[n] = bi[n] - 1; 1622 for (i = n - 1; i >= 0; i--) { 1623 nz = ai[i + 1] - adiag[i] - 1; 1624 aj = a->j + adiag[i] + 1; 1625 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1626 /* diag[i] */ 1627 bj[k++] = i; 1628 bdiag[i] = bdiag[i + 1] + nz + 1; 1629 } 1630 1631 fact->factortype = MAT_FACTOR_ILU; 1632 fact->info.factor_mallocs = 0; 1633 fact->info.fill_ratio_given = info->fill; 1634 fact->info.fill_ratio_needed = 1.0; 1635 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1636 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1637 1638 b = (Mat_SeqAIJ *)(fact)->data; 1639 b->row = isrow; 1640 b->col = iscol; 1641 b->icol = isicol; 1642 PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 1643 PetscCall(PetscObjectReference((PetscObject)isrow)); 1644 PetscCall(PetscObjectReference((PetscObject)iscol)); 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1649 { 1650 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1651 IS isicol; 1652 const PetscInt *r, *ic; 1653 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1654 PetscInt *bi, *cols, nnz, *cols_lvl; 1655 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1656 PetscInt i, levels, diagonal_fill; 1657 PetscBool col_identity, row_identity, missing; 1658 PetscReal f; 1659 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1660 PetscBT lnkbt; 1661 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1662 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1663 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1664 1665 PetscFunctionBegin; 1666 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1667 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1668 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1669 1670 levels = (PetscInt)info->levels; 1671 PetscCall(ISIdentity(isrow, &row_identity)); 1672 PetscCall(ISIdentity(iscol, &col_identity)); 1673 if (!levels && row_identity && col_identity) { 1674 /* special case: ilu(0) with natural ordering */ 1675 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1676 if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1677 PetscFunctionReturn(PETSC_SUCCESS); 1678 } 1679 1680 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1681 PetscCall(ISGetIndices(isrow, &r)); 1682 PetscCall(ISGetIndices(isicol, &ic)); 1683 1684 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1685 PetscCall(PetscMalloc1(n + 1, &bi)); 1686 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1687 bi[0] = bdiag[0] = 0; 1688 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1689 1690 /* create a linked list for storing column indices of the active row */ 1691 nlnk = n + 1; 1692 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1693 1694 /* initial FreeSpace size is f*(ai[n]+1) */ 1695 f = info->fill; 1696 diagonal_fill = (PetscInt)info->diagonal_fill; 1697 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1698 current_space = free_space; 1699 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1700 current_space_lvl = free_space_lvl; 1701 for (i = 0; i < n; i++) { 1702 nzi = 0; 1703 /* copy current row into linked list */ 1704 nnz = ai[r[i] + 1] - ai[r[i]]; 1705 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1706 cols = aj + ai[r[i]]; 1707 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1708 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1709 nzi += nlnk; 1710 1711 /* make sure diagonal entry is included */ 1712 if (diagonal_fill && lnk[i] == -1) { 1713 fm = n; 1714 while (lnk[fm] < i) fm = lnk[fm]; 1715 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1716 lnk[fm] = i; 1717 lnk_lvl[i] = 0; 1718 nzi++; 1719 dcount++; 1720 } 1721 1722 /* add pivot rows into the active row */ 1723 nzbd = 0; 1724 prow = lnk[n]; 1725 while (prow < i) { 1726 nnz = bdiag[prow]; 1727 cols = bj_ptr[prow] + nnz + 1; 1728 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1729 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1730 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1731 nzi += nlnk; 1732 prow = lnk[prow]; 1733 nzbd++; 1734 } 1735 bdiag[i] = nzbd; 1736 bi[i + 1] = bi[i] + nzi; 1737 /* if free space is not available, make more free space */ 1738 if (current_space->local_remaining < nzi) { 1739 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1740 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1741 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1742 reallocs++; 1743 } 1744 1745 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1746 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1747 bj_ptr[i] = current_space->array; 1748 bjlvl_ptr[i] = current_space_lvl->array; 1749 1750 /* make sure the active row i has diagonal entry */ 1751 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1752 1753 current_space->array += nzi; 1754 current_space->local_used += nzi; 1755 current_space->local_remaining -= nzi; 1756 current_space_lvl->array += nzi; 1757 current_space_lvl->local_used += nzi; 1758 current_space_lvl->local_remaining -= nzi; 1759 } 1760 1761 PetscCall(ISRestoreIndices(isrow, &r)); 1762 PetscCall(ISRestoreIndices(isicol, &ic)); 1763 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1764 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1765 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1766 1767 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1768 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1769 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1770 1771 #if defined(PETSC_USE_INFO) 1772 { 1773 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1774 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1775 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1776 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1777 PetscCall(PetscInfo(A, "for best performance.\n")); 1778 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1779 } 1780 #endif 1781 /* put together the new matrix */ 1782 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1783 b = (Mat_SeqAIJ *)(fact)->data; 1784 1785 b->free_a = PETSC_TRUE; 1786 b->free_ij = PETSC_TRUE; 1787 b->singlemalloc = PETSC_FALSE; 1788 1789 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 1790 1791 b->j = bj; 1792 b->i = bi; 1793 b->diag = bdiag; 1794 b->ilen = NULL; 1795 b->imax = NULL; 1796 b->row = isrow; 1797 b->col = iscol; 1798 PetscCall(PetscObjectReference((PetscObject)isrow)); 1799 PetscCall(PetscObjectReference((PetscObject)iscol)); 1800 b->icol = isicol; 1801 1802 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1803 /* In b structure: Free imax, ilen, old a, old j. 1804 Allocate bdiag, solve_work, new a, new j */ 1805 b->maxnz = b->nz = bdiag[0] + 1; 1806 1807 (fact)->info.factor_mallocs = reallocs; 1808 (fact)->info.fill_ratio_given = f; 1809 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1810 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1811 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1812 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1813 PetscFunctionReturn(PETSC_SUCCESS); 1814 } 1815 1816 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1817 { 1818 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1819 IS isicol; 1820 const PetscInt *r, *ic; 1821 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1822 PetscInt *bi, *cols, nnz, *cols_lvl; 1823 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1824 PetscInt i, levels, diagonal_fill; 1825 PetscBool col_identity, row_identity; 1826 PetscReal f; 1827 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1828 PetscBT lnkbt; 1829 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1830 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1831 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1832 PetscBool missing; 1833 1834 PetscFunctionBegin; 1835 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1836 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1837 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1838 1839 f = info->fill; 1840 levels = (PetscInt)info->levels; 1841 diagonal_fill = (PetscInt)info->diagonal_fill; 1842 1843 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1844 1845 PetscCall(ISIdentity(isrow, &row_identity)); 1846 PetscCall(ISIdentity(iscol, &col_identity)); 1847 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1848 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 1849 1850 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1851 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1852 fact->factortype = MAT_FACTOR_ILU; 1853 (fact)->info.factor_mallocs = 0; 1854 (fact)->info.fill_ratio_given = info->fill; 1855 (fact)->info.fill_ratio_needed = 1.0; 1856 1857 b = (Mat_SeqAIJ *)(fact)->data; 1858 b->row = isrow; 1859 b->col = iscol; 1860 b->icol = isicol; 1861 PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 1862 PetscCall(PetscObjectReference((PetscObject)isrow)); 1863 PetscCall(PetscObjectReference((PetscObject)iscol)); 1864 PetscFunctionReturn(PETSC_SUCCESS); 1865 } 1866 1867 PetscCall(ISGetIndices(isrow, &r)); 1868 PetscCall(ISGetIndices(isicol, &ic)); 1869 1870 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1871 PetscCall(PetscMalloc1(n + 1, &bi)); 1872 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1873 bi[0] = bdiag[0] = 0; 1874 1875 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1876 1877 /* create a linked list for storing column indices of the active row */ 1878 nlnk = n + 1; 1879 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1880 1881 /* initial FreeSpace size is f*(ai[n]+1) */ 1882 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1883 current_space = free_space; 1884 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1885 current_space_lvl = free_space_lvl; 1886 1887 for (i = 0; i < n; i++) { 1888 nzi = 0; 1889 /* copy current row into linked list */ 1890 nnz = ai[r[i] + 1] - ai[r[i]]; 1891 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1892 cols = aj + ai[r[i]]; 1893 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1894 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1895 nzi += nlnk; 1896 1897 /* make sure diagonal entry is included */ 1898 if (diagonal_fill && lnk[i] == -1) { 1899 fm = n; 1900 while (lnk[fm] < i) fm = lnk[fm]; 1901 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1902 lnk[fm] = i; 1903 lnk_lvl[i] = 0; 1904 nzi++; 1905 dcount++; 1906 } 1907 1908 /* add pivot rows into the active row */ 1909 nzbd = 0; 1910 prow = lnk[n]; 1911 while (prow < i) { 1912 nnz = bdiag[prow]; 1913 cols = bj_ptr[prow] + nnz + 1; 1914 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1915 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1916 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1917 nzi += nlnk; 1918 prow = lnk[prow]; 1919 nzbd++; 1920 } 1921 bdiag[i] = nzbd; 1922 bi[i + 1] = bi[i] + nzi; 1923 1924 /* if free space is not available, make more free space */ 1925 if (current_space->local_remaining < nzi) { 1926 nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 1927 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1928 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1929 reallocs++; 1930 } 1931 1932 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1933 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1934 bj_ptr[i] = current_space->array; 1935 bjlvl_ptr[i] = current_space_lvl->array; 1936 1937 /* make sure the active row i has diagonal entry */ 1938 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1939 1940 current_space->array += nzi; 1941 current_space->local_used += nzi; 1942 current_space->local_remaining -= nzi; 1943 current_space_lvl->array += nzi; 1944 current_space_lvl->local_used += nzi; 1945 current_space_lvl->local_remaining -= nzi; 1946 } 1947 1948 PetscCall(ISRestoreIndices(isrow, &r)); 1949 PetscCall(ISRestoreIndices(isicol, &ic)); 1950 1951 /* destroy list of free space and other temporary arrays */ 1952 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1953 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 1954 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1955 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1956 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1957 1958 #if defined(PETSC_USE_INFO) 1959 { 1960 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1961 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1962 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1963 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1964 PetscCall(PetscInfo(A, "for best performance.\n")); 1965 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1966 } 1967 #endif 1968 1969 /* put together the new matrix */ 1970 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1971 b = (Mat_SeqAIJ *)(fact)->data; 1972 1973 b->free_a = PETSC_TRUE; 1974 b->free_ij = PETSC_TRUE; 1975 b->singlemalloc = PETSC_FALSE; 1976 1977 PetscCall(PetscMalloc1(bi[n], &b->a)); 1978 b->j = bj; 1979 b->i = bi; 1980 for (i = 0; i < n; i++) bdiag[i] += bi[i]; 1981 b->diag = bdiag; 1982 b->ilen = NULL; 1983 b->imax = NULL; 1984 b->row = isrow; 1985 b->col = iscol; 1986 PetscCall(PetscObjectReference((PetscObject)isrow)); 1987 PetscCall(PetscObjectReference((PetscObject)iscol)); 1988 b->icol = isicol; 1989 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1990 /* In b structure: Free imax, ilen, old a, old j. 1991 Allocate bdiag, solve_work, new a, new j */ 1992 b->maxnz = b->nz = bi[n]; 1993 1994 (fact)->info.factor_mallocs = reallocs; 1995 (fact)->info.fill_ratio_given = f; 1996 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1997 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1998 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1999 PetscFunctionReturn(PETSC_SUCCESS); 2000 } 2001 2002 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 2003 { 2004 Mat C = B; 2005 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2006 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2007 IS ip = b->row, iip = b->icol; 2008 const PetscInt *rip, *riip; 2009 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 2010 PetscInt *ai = a->i, *aj = a->j; 2011 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 2012 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2013 PetscBool perm_identity; 2014 FactorShiftCtx sctx; 2015 PetscReal rs; 2016 MatScalar d, *v; 2017 2018 PetscFunctionBegin; 2019 /* MatPivotSetUp(): initialize shift context sctx */ 2020 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2021 2022 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2023 sctx.shift_top = info->zeropivot; 2024 for (i = 0; i < mbs; i++) { 2025 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2026 d = (aa)[a->diag[i]]; 2027 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2028 v = aa + ai[i]; 2029 nz = ai[i + 1] - ai[i]; 2030 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2031 if (rs > sctx.shift_top) sctx.shift_top = rs; 2032 } 2033 sctx.shift_top *= 1.1; 2034 sctx.nshift_max = 5; 2035 sctx.shift_lo = 0.; 2036 sctx.shift_hi = 1.; 2037 } 2038 2039 PetscCall(ISGetIndices(ip, &rip)); 2040 PetscCall(ISGetIndices(iip, &riip)); 2041 2042 /* allocate working arrays 2043 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2044 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2045 */ 2046 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 2047 2048 do { 2049 sctx.newshift = PETSC_FALSE; 2050 2051 for (i = 0; i < mbs; i++) c2r[i] = mbs; 2052 if (mbs) il[0] = 0; 2053 2054 for (k = 0; k < mbs; k++) { 2055 /* zero rtmp */ 2056 nz = bi[k + 1] - bi[k]; 2057 bjtmp = bj + bi[k]; 2058 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2059 2060 /* load in initial unfactored row */ 2061 bval = ba + bi[k]; 2062 jmin = ai[rip[k]]; 2063 jmax = ai[rip[k] + 1]; 2064 for (j = jmin; j < jmax; j++) { 2065 col = riip[aj[j]]; 2066 if (col >= k) { /* only take upper triangular entry */ 2067 rtmp[col] = aa[j]; 2068 *bval++ = 0.0; /* for in-place factorization */ 2069 } 2070 } 2071 /* shift the diagonal of the matrix: ZeropivotApply() */ 2072 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2073 2074 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2075 dk = rtmp[k]; 2076 i = c2r[k]; /* first row to be added to k_th row */ 2077 2078 while (i < k) { 2079 nexti = c2r[i]; /* next row to be added to k_th row */ 2080 2081 /* compute multiplier, update diag(k) and U(i,k) */ 2082 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2083 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 2084 dk += uikdi * ba[ili]; /* update diag[k] */ 2085 ba[ili] = uikdi; /* -U(i,k) */ 2086 2087 /* add multiple of row i to k-th row */ 2088 jmin = ili + 1; 2089 jmax = bi[i + 1]; 2090 if (jmin < jmax) { 2091 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2092 /* update il and c2r for row i */ 2093 il[i] = jmin; 2094 j = bj[jmin]; 2095 c2r[i] = c2r[j]; 2096 c2r[j] = i; 2097 } 2098 i = nexti; 2099 } 2100 2101 /* copy data into U(k,:) */ 2102 rs = 0.0; 2103 jmin = bi[k]; 2104 jmax = bi[k + 1] - 1; 2105 if (jmin < jmax) { 2106 for (j = jmin; j < jmax; j++) { 2107 col = bj[j]; 2108 ba[j] = rtmp[col]; 2109 rs += PetscAbsScalar(ba[j]); 2110 } 2111 /* add the k-th row into il and c2r */ 2112 il[k] = jmin; 2113 i = bj[jmin]; 2114 c2r[k] = c2r[i]; 2115 c2r[i] = k; 2116 } 2117 2118 /* MatPivotCheck() */ 2119 sctx.rs = rs; 2120 sctx.pv = dk; 2121 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 2122 if (sctx.newshift) break; 2123 dk = sctx.pv; 2124 2125 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 2126 } 2127 } while (sctx.newshift); 2128 2129 PetscCall(PetscFree3(rtmp, il, c2r)); 2130 PetscCall(ISRestoreIndices(ip, &rip)); 2131 PetscCall(ISRestoreIndices(iip, &riip)); 2132 2133 PetscCall(ISIdentity(ip, &perm_identity)); 2134 if (perm_identity) { 2135 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2136 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2137 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2138 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2139 } else { 2140 B->ops->solve = MatSolve_SeqSBAIJ_1; 2141 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2142 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2143 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2144 } 2145 2146 C->assembled = PETSC_TRUE; 2147 C->preallocated = PETSC_TRUE; 2148 2149 PetscCall(PetscLogFlops(C->rmap->n)); 2150 2151 /* MatPivotView() */ 2152 if (sctx.nshift) { 2153 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2154 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2155 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2156 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2157 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2158 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 2159 } 2160 } 2161 PetscFunctionReturn(PETSC_SUCCESS); 2162 } 2163 2164 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 2165 { 2166 Mat C = B; 2167 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2168 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2169 IS ip = b->row, iip = b->icol; 2170 const PetscInt *rip, *riip; 2171 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 2172 PetscInt *ai = a->i, *aj = a->j; 2173 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2174 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2175 PetscBool perm_identity; 2176 FactorShiftCtx sctx; 2177 PetscReal rs; 2178 MatScalar d, *v; 2179 2180 PetscFunctionBegin; 2181 /* MatPivotSetUp(): initialize shift context sctx */ 2182 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2183 2184 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2185 sctx.shift_top = info->zeropivot; 2186 for (i = 0; i < mbs; i++) { 2187 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2188 d = (aa)[a->diag[i]]; 2189 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2190 v = aa + ai[i]; 2191 nz = ai[i + 1] - ai[i]; 2192 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2193 if (rs > sctx.shift_top) sctx.shift_top = rs; 2194 } 2195 sctx.shift_top *= 1.1; 2196 sctx.nshift_max = 5; 2197 sctx.shift_lo = 0.; 2198 sctx.shift_hi = 1.; 2199 } 2200 2201 PetscCall(ISGetIndices(ip, &rip)); 2202 PetscCall(ISGetIndices(iip, &riip)); 2203 2204 /* initialization */ 2205 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 2206 2207 do { 2208 sctx.newshift = PETSC_FALSE; 2209 2210 for (i = 0; i < mbs; i++) jl[i] = mbs; 2211 il[0] = 0; 2212 2213 for (k = 0; k < mbs; k++) { 2214 /* zero rtmp */ 2215 nz = bi[k + 1] - bi[k]; 2216 bjtmp = bj + bi[k]; 2217 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2218 2219 bval = ba + bi[k]; 2220 /* initialize k-th row by the perm[k]-th row of A */ 2221 jmin = ai[rip[k]]; 2222 jmax = ai[rip[k] + 1]; 2223 for (j = jmin; j < jmax; j++) { 2224 col = riip[aj[j]]; 2225 if (col >= k) { /* only take upper triangular entry */ 2226 rtmp[col] = aa[j]; 2227 *bval++ = 0.0; /* for in-place factorization */ 2228 } 2229 } 2230 /* shift the diagonal of the matrix */ 2231 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2232 2233 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2234 dk = rtmp[k]; 2235 i = jl[k]; /* first row to be added to k_th row */ 2236 2237 while (i < k) { 2238 nexti = jl[i]; /* next row to be added to k_th row */ 2239 2240 /* compute multiplier, update diag(k) and U(i,k) */ 2241 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2242 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 2243 dk += uikdi * ba[ili]; 2244 ba[ili] = uikdi; /* -U(i,k) */ 2245 2246 /* add multiple of row i to k-th row */ 2247 jmin = ili + 1; 2248 jmax = bi[i + 1]; 2249 if (jmin < jmax) { 2250 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2251 /* update il and jl for row i */ 2252 il[i] = jmin; 2253 j = bj[jmin]; 2254 jl[i] = jl[j]; 2255 jl[j] = i; 2256 } 2257 i = nexti; 2258 } 2259 2260 /* shift the diagonals when zero pivot is detected */ 2261 /* compute rs=sum of abs(off-diagonal) */ 2262 rs = 0.0; 2263 jmin = bi[k] + 1; 2264 nz = bi[k + 1] - jmin; 2265 bcol = bj + jmin; 2266 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2267 2268 sctx.rs = rs; 2269 sctx.pv = dk; 2270 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 2271 if (sctx.newshift) break; 2272 dk = sctx.pv; 2273 2274 /* copy data into U(k,:) */ 2275 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 2276 jmin = bi[k] + 1; 2277 jmax = bi[k + 1]; 2278 if (jmin < jmax) { 2279 for (j = jmin; j < jmax; j++) { 2280 col = bj[j]; 2281 ba[j] = rtmp[col]; 2282 } 2283 /* add the k-th row into il and jl */ 2284 il[k] = jmin; 2285 i = bj[jmin]; 2286 jl[k] = jl[i]; 2287 jl[i] = k; 2288 } 2289 } 2290 } while (sctx.newshift); 2291 2292 PetscCall(PetscFree3(rtmp, il, jl)); 2293 PetscCall(ISRestoreIndices(ip, &rip)); 2294 PetscCall(ISRestoreIndices(iip, &riip)); 2295 2296 PetscCall(ISIdentity(ip, &perm_identity)); 2297 if (perm_identity) { 2298 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2299 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2300 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2301 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2302 } else { 2303 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2304 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2305 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2306 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2307 } 2308 2309 C->assembled = PETSC_TRUE; 2310 C->preallocated = PETSC_TRUE; 2311 2312 PetscCall(PetscLogFlops(C->rmap->n)); 2313 if (sctx.nshift) { 2314 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2315 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2316 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2317 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2318 } 2319 } 2320 PetscFunctionReturn(PETSC_SUCCESS); 2321 } 2322 2323 /* 2324 icc() under revised new data structure. 2325 Factored arrays bj and ba are stored as 2326 U(0,:),...,U(i,:),U(n-1,:) 2327 2328 ui=fact->i is an array of size n+1, in which 2329 ui+ 2330 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2331 ui[n]: points to U(n-1,n-1)+1 2332 2333 udiag=fact->diag is an array of size n,in which 2334 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2335 2336 U(i,:) contains udiag[i] as its last entry, i.e., 2337 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2338 */ 2339 2340 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2341 { 2342 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2343 Mat_SeqSBAIJ *b; 2344 PetscBool perm_identity, missing; 2345 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2346 const PetscInt *rip, *riip; 2347 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2348 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2349 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2350 PetscReal fill = info->fill, levels = info->levels; 2351 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2352 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2353 PetscBT lnkbt; 2354 IS iperm; 2355 2356 PetscFunctionBegin; 2357 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2358 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2359 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2360 PetscCall(ISIdentity(perm, &perm_identity)); 2361 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2362 2363 PetscCall(PetscMalloc1(am + 1, &ui)); 2364 PetscCall(PetscMalloc1(am + 1, &udiag)); 2365 ui[0] = 0; 2366 2367 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2368 if (!levels && perm_identity) { 2369 for (i = 0; i < am; i++) { 2370 ncols = ai[i + 1] - a->diag[i]; 2371 ui[i + 1] = ui[i] + ncols; 2372 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2373 } 2374 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2375 cols = uj; 2376 for (i = 0; i < am; i++) { 2377 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2378 ncols = ai[i + 1] - a->diag[i] - 1; 2379 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2380 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2381 } 2382 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2383 PetscCall(ISGetIndices(iperm, &riip)); 2384 PetscCall(ISGetIndices(perm, &rip)); 2385 2386 /* initialization */ 2387 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2388 2389 /* jl: linked list for storing indices of the pivot rows 2390 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2391 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2392 for (i = 0; i < am; i++) { 2393 jl[i] = am; 2394 il[i] = 0; 2395 } 2396 2397 /* create and initialize a linked list for storing column indices of the active row k */ 2398 nlnk = am + 1; 2399 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2400 2401 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2402 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2403 current_space = free_space; 2404 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2405 current_space_lvl = free_space_lvl; 2406 2407 for (k = 0; k < am; k++) { /* for each active row k */ 2408 /* initialize lnk by the column indices of row rip[k] of A */ 2409 nzk = 0; 2410 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2411 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2412 ncols_upper = 0; 2413 for (j = 0; j < ncols; j++) { 2414 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2415 if (riip[i] >= k) { /* only take upper triangular entry */ 2416 ajtmp[ncols_upper] = i; 2417 ncols_upper++; 2418 } 2419 } 2420 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2421 nzk += nlnk; 2422 2423 /* update lnk by computing fill-in for each pivot row to be merged in */ 2424 prow = jl[k]; /* 1st pivot row */ 2425 2426 while (prow < k) { 2427 nextprow = jl[prow]; 2428 2429 /* merge prow into k-th row */ 2430 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2431 jmax = ui[prow + 1]; 2432 ncols = jmax - jmin; 2433 i = jmin - ui[prow]; 2434 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2435 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2436 j = *(uj - 1); 2437 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2438 nzk += nlnk; 2439 2440 /* update il and jl for prow */ 2441 if (jmin < jmax) { 2442 il[prow] = jmin; 2443 j = *cols; 2444 jl[prow] = jl[j]; 2445 jl[j] = prow; 2446 } 2447 prow = nextprow; 2448 } 2449 2450 /* if free space is not available, make more free space */ 2451 if (current_space->local_remaining < nzk) { 2452 i = am - k + 1; /* num of unfactored rows */ 2453 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2454 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2455 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2456 reallocs++; 2457 } 2458 2459 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2460 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2461 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2462 2463 /* add the k-th row into il and jl */ 2464 if (nzk > 1) { 2465 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2466 jl[k] = jl[i]; 2467 jl[i] = k; 2468 il[k] = ui[k] + 1; 2469 } 2470 uj_ptr[k] = current_space->array; 2471 uj_lvl_ptr[k] = current_space_lvl->array; 2472 2473 current_space->array += nzk; 2474 current_space->local_used += nzk; 2475 current_space->local_remaining -= nzk; 2476 2477 current_space_lvl->array += nzk; 2478 current_space_lvl->local_used += nzk; 2479 current_space_lvl->local_remaining -= nzk; 2480 2481 ui[k + 1] = ui[k] + nzk; 2482 } 2483 2484 PetscCall(ISRestoreIndices(perm, &rip)); 2485 PetscCall(ISRestoreIndices(iperm, &riip)); 2486 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2487 PetscCall(PetscFree(ajtmp)); 2488 2489 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2490 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2491 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2492 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2493 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2494 2495 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2496 2497 /* put together the new matrix in MATSEQSBAIJ format */ 2498 b = (Mat_SeqSBAIJ *)(fact)->data; 2499 b->singlemalloc = PETSC_FALSE; 2500 2501 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2502 2503 b->j = uj; 2504 b->i = ui; 2505 b->diag = udiag; 2506 b->free_diag = PETSC_TRUE; 2507 b->ilen = NULL; 2508 b->imax = NULL; 2509 b->row = perm; 2510 b->col = perm; 2511 PetscCall(PetscObjectReference((PetscObject)perm)); 2512 PetscCall(PetscObjectReference((PetscObject)perm)); 2513 b->icol = iperm; 2514 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2515 2516 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2517 2518 b->maxnz = b->nz = ui[am]; 2519 b->free_a = PETSC_TRUE; 2520 b->free_ij = PETSC_TRUE; 2521 2522 fact->info.factor_mallocs = reallocs; 2523 fact->info.fill_ratio_given = fill; 2524 if (ai[am] != 0) { 2525 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2526 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2527 } else { 2528 fact->info.fill_ratio_needed = 0.0; 2529 } 2530 #if defined(PETSC_USE_INFO) 2531 if (ai[am] != 0) { 2532 PetscReal af = fact->info.fill_ratio_needed; 2533 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2534 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2535 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2536 } else { 2537 PetscCall(PetscInfo(A, "Empty matrix\n")); 2538 } 2539 #endif 2540 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2541 PetscFunctionReturn(PETSC_SUCCESS); 2542 } 2543 2544 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2545 { 2546 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2547 Mat_SeqSBAIJ *b; 2548 PetscBool perm_identity, missing; 2549 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2550 const PetscInt *rip, *riip; 2551 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2552 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2553 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2554 PetscReal fill = info->fill, levels = info->levels; 2555 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2556 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2557 PetscBT lnkbt; 2558 IS iperm; 2559 2560 PetscFunctionBegin; 2561 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2562 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2563 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2564 PetscCall(ISIdentity(perm, &perm_identity)); 2565 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2566 2567 PetscCall(PetscMalloc1(am + 1, &ui)); 2568 PetscCall(PetscMalloc1(am + 1, &udiag)); 2569 ui[0] = 0; 2570 2571 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2572 if (!levels && perm_identity) { 2573 for (i = 0; i < am; i++) { 2574 ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 2575 udiag[i] = ui[i]; 2576 } 2577 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2578 cols = uj; 2579 for (i = 0; i < am; i++) { 2580 aj = a->j + a->diag[i]; 2581 ncols = ui[i + 1] - ui[i]; 2582 for (j = 0; j < ncols; j++) *cols++ = *aj++; 2583 } 2584 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2585 PetscCall(ISGetIndices(iperm, &riip)); 2586 PetscCall(ISGetIndices(perm, &rip)); 2587 2588 /* initialization */ 2589 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2590 2591 /* jl: linked list for storing indices of the pivot rows 2592 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2593 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2594 for (i = 0; i < am; i++) { 2595 jl[i] = am; 2596 il[i] = 0; 2597 } 2598 2599 /* create and initialize a linked list for storing column indices of the active row k */ 2600 nlnk = am + 1; 2601 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2602 2603 /* initial FreeSpace size is fill*(ai[am]+1) */ 2604 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2605 current_space = free_space; 2606 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 2607 current_space_lvl = free_space_lvl; 2608 2609 for (k = 0; k < am; k++) { /* for each active row k */ 2610 /* initialize lnk by the column indices of row rip[k] of A */ 2611 nzk = 0; 2612 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2613 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2614 ncols_upper = 0; 2615 for (j = 0; j < ncols; j++) { 2616 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2617 if (riip[i] >= k) { /* only take upper triangular entry */ 2618 ajtmp[ncols_upper] = i; 2619 ncols_upper++; 2620 } 2621 } 2622 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2623 nzk += nlnk; 2624 2625 /* update lnk by computing fill-in for each pivot row to be merged in */ 2626 prow = jl[k]; /* 1st pivot row */ 2627 2628 while (prow < k) { 2629 nextprow = jl[prow]; 2630 2631 /* merge prow into k-th row */ 2632 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2633 jmax = ui[prow + 1]; 2634 ncols = jmax - jmin; 2635 i = jmin - ui[prow]; 2636 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2637 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2638 j = *(uj - 1); 2639 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2640 nzk += nlnk; 2641 2642 /* update il and jl for prow */ 2643 if (jmin < jmax) { 2644 il[prow] = jmin; 2645 j = *cols; 2646 jl[prow] = jl[j]; 2647 jl[j] = prow; 2648 } 2649 prow = nextprow; 2650 } 2651 2652 /* if free space is not available, make more free space */ 2653 if (current_space->local_remaining < nzk) { 2654 i = am - k + 1; /* num of unfactored rows */ 2655 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2656 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2657 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2658 reallocs++; 2659 } 2660 2661 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2662 PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2663 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2664 2665 /* add the k-th row into il and jl */ 2666 if (nzk > 1) { 2667 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2668 jl[k] = jl[i]; 2669 jl[i] = k; 2670 il[k] = ui[k] + 1; 2671 } 2672 uj_ptr[k] = current_space->array; 2673 uj_lvl_ptr[k] = current_space_lvl->array; 2674 2675 current_space->array += nzk; 2676 current_space->local_used += nzk; 2677 current_space->local_remaining -= nzk; 2678 2679 current_space_lvl->array += nzk; 2680 current_space_lvl->local_used += nzk; 2681 current_space_lvl->local_remaining -= nzk; 2682 2683 ui[k + 1] = ui[k] + nzk; 2684 } 2685 2686 #if defined(PETSC_USE_INFO) 2687 if (ai[am] != 0) { 2688 PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 2689 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2690 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2691 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2692 } else { 2693 PetscCall(PetscInfo(A, "Empty matrix\n")); 2694 } 2695 #endif 2696 2697 PetscCall(ISRestoreIndices(perm, &rip)); 2698 PetscCall(ISRestoreIndices(iperm, &riip)); 2699 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2700 PetscCall(PetscFree(ajtmp)); 2701 2702 /* destroy list of free space and other temporary array(s) */ 2703 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2704 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 2705 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2706 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2707 2708 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2709 2710 /* put together the new matrix in MATSEQSBAIJ format */ 2711 2712 b = (Mat_SeqSBAIJ *)fact->data; 2713 b->singlemalloc = PETSC_FALSE; 2714 2715 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2716 2717 b->j = uj; 2718 b->i = ui; 2719 b->diag = udiag; 2720 b->free_diag = PETSC_TRUE; 2721 b->ilen = NULL; 2722 b->imax = NULL; 2723 b->row = perm; 2724 b->col = perm; 2725 2726 PetscCall(PetscObjectReference((PetscObject)perm)); 2727 PetscCall(PetscObjectReference((PetscObject)perm)); 2728 2729 b->icol = iperm; 2730 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2731 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2732 b->maxnz = b->nz = ui[am]; 2733 b->free_a = PETSC_TRUE; 2734 b->free_ij = PETSC_TRUE; 2735 2736 fact->info.factor_mallocs = reallocs; 2737 fact->info.fill_ratio_given = fill; 2738 if (ai[am] != 0) { 2739 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 2740 } else { 2741 fact->info.fill_ratio_needed = 0.0; 2742 } 2743 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2744 PetscFunctionReturn(PETSC_SUCCESS); 2745 } 2746 2747 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2748 { 2749 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2750 Mat_SeqSBAIJ *b; 2751 PetscBool perm_identity, missing; 2752 PetscReal fill = info->fill; 2753 const PetscInt *rip, *riip; 2754 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2755 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2756 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2757 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2758 PetscBT lnkbt; 2759 IS iperm; 2760 2761 PetscFunctionBegin; 2762 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2763 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2764 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2765 2766 /* check whether perm is the identity mapping */ 2767 PetscCall(ISIdentity(perm, &perm_identity)); 2768 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2769 PetscCall(ISGetIndices(iperm, &riip)); 2770 PetscCall(ISGetIndices(perm, &rip)); 2771 2772 /* initialization */ 2773 PetscCall(PetscMalloc1(am + 1, &ui)); 2774 PetscCall(PetscMalloc1(am + 1, &udiag)); 2775 ui[0] = 0; 2776 2777 /* jl: linked list for storing indices of the pivot rows 2778 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2779 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2780 for (i = 0; i < am; i++) { 2781 jl[i] = am; 2782 il[i] = 0; 2783 } 2784 2785 /* create and initialize a linked list for storing column indices of the active row k */ 2786 nlnk = am + 1; 2787 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2788 2789 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2790 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2791 current_space = free_space; 2792 2793 for (k = 0; k < am; k++) { /* for each active row k */ 2794 /* initialize lnk by the column indices of row rip[k] of A */ 2795 nzk = 0; 2796 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2797 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2798 ncols_upper = 0; 2799 for (j = 0; j < ncols; j++) { 2800 i = riip[*(aj + ai[rip[k]] + j)]; 2801 if (i >= k) { /* only take upper triangular entry */ 2802 cols[ncols_upper] = i; 2803 ncols_upper++; 2804 } 2805 } 2806 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2807 nzk += nlnk; 2808 2809 /* update lnk by computing fill-in for each pivot row to be merged in */ 2810 prow = jl[k]; /* 1st pivot row */ 2811 2812 while (prow < k) { 2813 nextprow = jl[prow]; 2814 /* merge prow into k-th row */ 2815 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2816 jmax = ui[prow + 1]; 2817 ncols = jmax - jmin; 2818 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2819 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2820 nzk += nlnk; 2821 2822 /* update il and jl for prow */ 2823 if (jmin < jmax) { 2824 il[prow] = jmin; 2825 j = *uj_ptr; 2826 jl[prow] = jl[j]; 2827 jl[j] = prow; 2828 } 2829 prow = nextprow; 2830 } 2831 2832 /* if free space is not available, make more free space */ 2833 if (current_space->local_remaining < nzk) { 2834 i = am - k + 1; /* num of unfactored rows */ 2835 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2836 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2837 reallocs++; 2838 } 2839 2840 /* copy data into free space, then initialize lnk */ 2841 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2842 2843 /* add the k-th row into il and jl */ 2844 if (nzk > 1) { 2845 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2846 jl[k] = jl[i]; 2847 jl[i] = k; 2848 il[k] = ui[k] + 1; 2849 } 2850 ui_ptr[k] = current_space->array; 2851 2852 current_space->array += nzk; 2853 current_space->local_used += nzk; 2854 current_space->local_remaining -= nzk; 2855 2856 ui[k + 1] = ui[k] + nzk; 2857 } 2858 2859 PetscCall(ISRestoreIndices(perm, &rip)); 2860 PetscCall(ISRestoreIndices(iperm, &riip)); 2861 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2862 2863 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2864 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2865 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2866 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2867 2868 /* put together the new matrix in MATSEQSBAIJ format */ 2869 2870 b = (Mat_SeqSBAIJ *)fact->data; 2871 b->singlemalloc = PETSC_FALSE; 2872 b->free_a = PETSC_TRUE; 2873 b->free_ij = PETSC_TRUE; 2874 2875 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2876 2877 b->j = uj; 2878 b->i = ui; 2879 b->diag = udiag; 2880 b->free_diag = PETSC_TRUE; 2881 b->ilen = NULL; 2882 b->imax = NULL; 2883 b->row = perm; 2884 b->col = perm; 2885 2886 PetscCall(PetscObjectReference((PetscObject)perm)); 2887 PetscCall(PetscObjectReference((PetscObject)perm)); 2888 2889 b->icol = iperm; 2890 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2891 2892 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2893 2894 b->maxnz = b->nz = ui[am]; 2895 2896 fact->info.factor_mallocs = reallocs; 2897 fact->info.fill_ratio_given = fill; 2898 if (ai[am] != 0) { 2899 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2900 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2901 } else { 2902 fact->info.fill_ratio_needed = 0.0; 2903 } 2904 #if defined(PETSC_USE_INFO) 2905 if (ai[am] != 0) { 2906 PetscReal af = fact->info.fill_ratio_needed; 2907 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2908 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2909 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2910 } else { 2911 PetscCall(PetscInfo(A, "Empty matrix\n")); 2912 } 2913 #endif 2914 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2915 PetscFunctionReturn(PETSC_SUCCESS); 2916 } 2917 2918 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2919 { 2920 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2921 Mat_SeqSBAIJ *b; 2922 PetscBool perm_identity, missing; 2923 PetscReal fill = info->fill; 2924 const PetscInt *rip, *riip; 2925 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2926 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2927 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 2928 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2929 PetscBT lnkbt; 2930 IS iperm; 2931 2932 PetscFunctionBegin; 2933 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2934 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2935 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2936 2937 /* check whether perm is the identity mapping */ 2938 PetscCall(ISIdentity(perm, &perm_identity)); 2939 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2940 PetscCall(ISGetIndices(iperm, &riip)); 2941 PetscCall(ISGetIndices(perm, &rip)); 2942 2943 /* initialization */ 2944 PetscCall(PetscMalloc1(am + 1, &ui)); 2945 ui[0] = 0; 2946 2947 /* jl: linked list for storing indices of the pivot rows 2948 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2949 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2950 for (i = 0; i < am; i++) { 2951 jl[i] = am; 2952 il[i] = 0; 2953 } 2954 2955 /* create and initialize a linked list for storing column indices of the active row k */ 2956 nlnk = am + 1; 2957 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2958 2959 /* initial FreeSpace size is fill*(ai[am]+1) */ 2960 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2961 current_space = free_space; 2962 2963 for (k = 0; k < am; k++) { /* for each active row k */ 2964 /* initialize lnk by the column indices of row rip[k] of A */ 2965 nzk = 0; 2966 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2967 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2968 ncols_upper = 0; 2969 for (j = 0; j < ncols; j++) { 2970 i = riip[*(aj + ai[rip[k]] + j)]; 2971 if (i >= k) { /* only take upper triangular entry */ 2972 cols[ncols_upper] = i; 2973 ncols_upper++; 2974 } 2975 } 2976 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2977 nzk += nlnk; 2978 2979 /* update lnk by computing fill-in for each pivot row to be merged in */ 2980 prow = jl[k]; /* 1st pivot row */ 2981 2982 while (prow < k) { 2983 nextprow = jl[prow]; 2984 /* merge prow into k-th row */ 2985 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2986 jmax = ui[prow + 1]; 2987 ncols = jmax - jmin; 2988 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2989 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2990 nzk += nlnk; 2991 2992 /* update il and jl for prow */ 2993 if (jmin < jmax) { 2994 il[prow] = jmin; 2995 j = *uj_ptr; 2996 jl[prow] = jl[j]; 2997 jl[j] = prow; 2998 } 2999 prow = nextprow; 3000 } 3001 3002 /* if free space is not available, make more free space */ 3003 if (current_space->local_remaining < nzk) { 3004 i = am - k + 1; /* num of unfactored rows */ 3005 i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3006 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 3007 reallocs++; 3008 } 3009 3010 /* copy data into free space, then initialize lnk */ 3011 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 3012 3013 /* add the k-th row into il and jl */ 3014 if (nzk - 1 > 0) { 3015 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3016 jl[k] = jl[i]; 3017 jl[i] = k; 3018 il[k] = ui[k] + 1; 3019 } 3020 ui_ptr[k] = current_space->array; 3021 3022 current_space->array += nzk; 3023 current_space->local_used += nzk; 3024 current_space->local_remaining -= nzk; 3025 3026 ui[k + 1] = ui[k] + nzk; 3027 } 3028 3029 #if defined(PETSC_USE_INFO) 3030 if (ai[am] != 0) { 3031 PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]); 3032 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3033 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3034 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3035 } else { 3036 PetscCall(PetscInfo(A, "Empty matrix\n")); 3037 } 3038 #endif 3039 3040 PetscCall(ISRestoreIndices(perm, &rip)); 3041 PetscCall(ISRestoreIndices(iperm, &riip)); 3042 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 3043 3044 /* destroy list of free space and other temporary array(s) */ 3045 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 3046 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 3047 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3048 3049 /* put together the new matrix in MATSEQSBAIJ format */ 3050 3051 b = (Mat_SeqSBAIJ *)fact->data; 3052 b->singlemalloc = PETSC_FALSE; 3053 b->free_a = PETSC_TRUE; 3054 b->free_ij = PETSC_TRUE; 3055 3056 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 3057 3058 b->j = uj; 3059 b->i = ui; 3060 b->diag = NULL; 3061 b->ilen = NULL; 3062 b->imax = NULL; 3063 b->row = perm; 3064 b->col = perm; 3065 3066 PetscCall(PetscObjectReference((PetscObject)perm)); 3067 PetscCall(PetscObjectReference((PetscObject)perm)); 3068 3069 b->icol = iperm; 3070 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3071 3072 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 3073 b->maxnz = b->nz = ui[am]; 3074 3075 fact->info.factor_mallocs = reallocs; 3076 fact->info.fill_ratio_given = fill; 3077 if (ai[am] != 0) { 3078 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 3079 } else { 3080 fact->info.fill_ratio_needed = 0.0; 3081 } 3082 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3083 PetscFunctionReturn(PETSC_SUCCESS); 3084 } 3085 3086 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 3087 { 3088 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3089 PetscInt n = A->rmap->n; 3090 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3091 PetscScalar *x, sum; 3092 const PetscScalar *b; 3093 const MatScalar *aa = a->a, *v; 3094 PetscInt i, nz; 3095 3096 PetscFunctionBegin; 3097 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3098 3099 PetscCall(VecGetArrayRead(bb, &b)); 3100 PetscCall(VecGetArrayWrite(xx, &x)); 3101 3102 /* forward solve the lower triangular */ 3103 x[0] = b[0]; 3104 v = aa; 3105 vi = aj; 3106 for (i = 1; i < n; i++) { 3107 nz = ai[i + 1] - ai[i]; 3108 sum = b[i]; 3109 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3110 v += nz; 3111 vi += nz; 3112 x[i] = sum; 3113 } 3114 3115 /* backward solve the upper triangular */ 3116 for (i = n - 1; i >= 0; i--) { 3117 v = aa + adiag[i + 1] + 1; 3118 vi = aj + adiag[i + 1] + 1; 3119 nz = adiag[i] - adiag[i + 1] - 1; 3120 sum = x[i]; 3121 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3122 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3123 } 3124 3125 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3126 PetscCall(VecRestoreArrayRead(bb, &b)); 3127 PetscCall(VecRestoreArrayWrite(xx, &x)); 3128 PetscFunctionReturn(PETSC_SUCCESS); 3129 } 3130 3131 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 3132 { 3133 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3134 IS iscol = a->col, isrow = a->row; 3135 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3136 const PetscInt *rout, *cout, *r, *c; 3137 PetscScalar *x, *tmp, sum; 3138 const PetscScalar *b; 3139 const MatScalar *aa = a->a, *v; 3140 3141 PetscFunctionBegin; 3142 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3143 3144 PetscCall(VecGetArrayRead(bb, &b)); 3145 PetscCall(VecGetArrayWrite(xx, &x)); 3146 tmp = a->solve_work; 3147 3148 PetscCall(ISGetIndices(isrow, &rout)); 3149 r = rout; 3150 PetscCall(ISGetIndices(iscol, &cout)); 3151 c = cout; 3152 3153 /* forward solve the lower triangular */ 3154 tmp[0] = b[r[0]]; 3155 v = aa; 3156 vi = aj; 3157 for (i = 1; i < n; i++) { 3158 nz = ai[i + 1] - ai[i]; 3159 sum = b[r[i]]; 3160 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3161 tmp[i] = sum; 3162 v += nz; 3163 vi += nz; 3164 } 3165 3166 /* backward solve the upper triangular */ 3167 for (i = n - 1; i >= 0; i--) { 3168 v = aa + adiag[i + 1] + 1; 3169 vi = aj + adiag[i + 1] + 1; 3170 nz = adiag[i] - adiag[i + 1] - 1; 3171 sum = tmp[i]; 3172 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3173 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3174 } 3175 3176 PetscCall(ISRestoreIndices(isrow, &rout)); 3177 PetscCall(ISRestoreIndices(iscol, &cout)); 3178 PetscCall(VecRestoreArrayRead(bb, &b)); 3179 PetscCall(VecRestoreArrayWrite(xx, &x)); 3180 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3181 PetscFunctionReturn(PETSC_SUCCESS); 3182 } 3183 3184 /* 3185 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3186 */ 3187 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) 3188 { 3189 Mat B = *fact; 3190 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3191 IS isicol; 3192 const PetscInt *r, *ic; 3193 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3194 PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3195 PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 3196 PetscInt nlnk, *lnk; 3197 PetscBT lnkbt; 3198 PetscBool row_identity, icol_identity; 3199 MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 3200 const PetscInt *ics; 3201 PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3202 PetscReal dt = info->dt, shift = info->shiftamount; 3203 PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3204 PetscBool missing; 3205 3206 PetscFunctionBegin; 3207 if (dt == PETSC_DEFAULT) dt = 0.005; 3208 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3209 3210 /* ------- symbolic factorization, can be reused ---------*/ 3211 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3212 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3213 adiag = a->diag; 3214 3215 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3216 3217 /* bdiag is location of diagonal in factor */ 3218 PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 3219 PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3220 3221 /* allocate row pointers bi */ 3222 PetscCall(PetscMalloc1(2 * n + 2, &bi)); 3223 3224 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3225 if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 3226 nnz_max = ai[n] + 2 * n * dtcount + 2; 3227 3228 PetscCall(PetscMalloc1(nnz_max + 1, &bj)); 3229 PetscCall(PetscMalloc1(nnz_max + 1, &ba)); 3230 3231 /* put together the new matrix */ 3232 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3233 b = (Mat_SeqAIJ *)B->data; 3234 3235 b->free_a = PETSC_TRUE; 3236 b->free_ij = PETSC_TRUE; 3237 b->singlemalloc = PETSC_FALSE; 3238 3239 b->a = ba; 3240 b->j = bj; 3241 b->i = bi; 3242 b->diag = bdiag; 3243 b->ilen = NULL; 3244 b->imax = NULL; 3245 b->row = isrow; 3246 b->col = iscol; 3247 PetscCall(PetscObjectReference((PetscObject)isrow)); 3248 PetscCall(PetscObjectReference((PetscObject)iscol)); 3249 b->icol = isicol; 3250 3251 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3252 b->maxnz = nnz_max; 3253 3254 B->factortype = MAT_FACTOR_ILUDT; 3255 B->info.factor_mallocs = 0; 3256 B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 3257 /* ------- end of symbolic factorization ---------*/ 3258 3259 PetscCall(ISGetIndices(isrow, &r)); 3260 PetscCall(ISGetIndices(isicol, &ic)); 3261 ics = ic; 3262 3263 /* linked list for storing column indices of the active row */ 3264 nlnk = n + 1; 3265 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3266 3267 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3268 PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 3269 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3270 PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 3271 PetscCall(PetscArrayzero(rtmp, n)); 3272 3273 bi[0] = 0; 3274 bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3275 bdiag_rev[n] = bdiag[0]; 3276 bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3277 for (i = 0; i < n; i++) { 3278 /* copy initial fill into linked list */ 3279 nzi = ai[r[i] + 1] - ai[r[i]]; 3280 PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 3281 nzi_al = adiag[r[i]] - ai[r[i]]; 3282 nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3283 ajtmp = aj + ai[r[i]]; 3284 PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3285 3286 /* load in initial (unfactored row) */ 3287 aatmp = a->a + ai[r[i]]; 3288 for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3289 3290 /* add pivot rows into linked list */ 3291 row = lnk[n]; 3292 while (row < i) { 3293 nzi_bl = bi[row + 1] - bi[row] + 1; 3294 bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 3295 PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3296 nzi += nlnk; 3297 row = lnk[row]; 3298 } 3299 3300 /* copy data from lnk into jtmp, then initialize lnk */ 3301 PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3302 3303 /* numerical factorization */ 3304 bjtmp = jtmp; 3305 row = *bjtmp++; /* 1st pivot row */ 3306 while (row < i) { 3307 pc = rtmp + row; 3308 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3309 multiplier = (*pc) * (*pv); 3310 *pc = multiplier; 3311 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3312 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3313 pv = ba + bdiag[row + 1] + 1; 3314 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3315 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3316 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3317 } 3318 row = *bjtmp++; 3319 } 3320 3321 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3322 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3323 nzi_bl = 0; 3324 j = 0; 3325 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3326 vtmp[j] = rtmp[jtmp[j]]; 3327 rtmp[jtmp[j]] = 0.0; 3328 nzi_bl++; 3329 j++; 3330 } 3331 nzi_bu = nzi - nzi_bl - 1; 3332 while (j < nzi) { 3333 vtmp[j] = rtmp[jtmp[j]]; 3334 rtmp[jtmp[j]] = 0.0; 3335 j++; 3336 } 3337 3338 bjtmp = bj + bi[i]; 3339 batmp = ba + bi[i]; 3340 /* apply level dropping rule to L part */ 3341 ncut = nzi_al + dtcount; 3342 if (ncut < nzi_bl) { 3343 PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 3344 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3345 } else { 3346 ncut = nzi_bl; 3347 } 3348 for (j = 0; j < ncut; j++) { 3349 bjtmp[j] = jtmp[j]; 3350 batmp[j] = vtmp[j]; 3351 } 3352 bi[i + 1] = bi[i] + ncut; 3353 nzi = ncut + 1; 3354 3355 /* apply level dropping rule to U part */ 3356 ncut = nzi_au + dtcount; 3357 if (ncut < nzi_bu) { 3358 PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 3359 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 3360 } else { 3361 ncut = nzi_bu; 3362 } 3363 nzi += ncut; 3364 3365 /* mark bdiagonal */ 3366 bdiag[i + 1] = bdiag[i] - (ncut + 1); 3367 bdiag_rev[n - i - 1] = bdiag[i + 1]; 3368 bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 3369 bjtmp = bj + bdiag[i]; 3370 batmp = ba + bdiag[i]; 3371 *bjtmp = i; 3372 *batmp = diag_tmp; /* rtmp[i]; */ 3373 if (*batmp == 0.0) *batmp = dt + shift; 3374 *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 3375 3376 bjtmp = bj + bdiag[i + 1] + 1; 3377 batmp = ba + bdiag[i + 1] + 1; 3378 for (k = 0; k < ncut; k++) { 3379 bjtmp[k] = jtmp[nzi_bl + 1 + k]; 3380 batmp[k] = vtmp[nzi_bl + 1 + k]; 3381 } 3382 3383 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3384 } /* for (i=0; i<n; i++) */ 3385 PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3386 3387 PetscCall(ISRestoreIndices(isrow, &r)); 3388 PetscCall(ISRestoreIndices(isicol, &ic)); 3389 3390 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3391 PetscCall(PetscFree2(im, jtmp)); 3392 PetscCall(PetscFree2(rtmp, vtmp)); 3393 PetscCall(PetscFree(bdiag_rev)); 3394 3395 PetscCall(PetscLogFlops(B->cmap->n)); 3396 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3397 3398 PetscCall(ISIdentity(isrow, &row_identity)); 3399 PetscCall(ISIdentity(isicol, &icol_identity)); 3400 if (row_identity && icol_identity) { 3401 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3402 } else { 3403 B->ops->solve = MatSolve_SeqAIJ; 3404 } 3405 3406 B->ops->solveadd = NULL; 3407 B->ops->solvetranspose = NULL; 3408 B->ops->solvetransposeadd = NULL; 3409 B->ops->matsolve = NULL; 3410 B->assembled = PETSC_TRUE; 3411 B->preallocated = PETSC_TRUE; 3412 PetscFunctionReturn(PETSC_SUCCESS); 3413 } 3414 3415 /* a wrapper of MatILUDTFactor_SeqAIJ() */ 3416 /* 3417 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3418 */ 3419 3420 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 3421 { 3422 PetscFunctionBegin; 3423 PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 3427 /* 3428 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3429 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3430 */ 3431 /* 3432 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3433 */ 3434 3435 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) 3436 { 3437 Mat C = fact; 3438 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 3439 IS isrow = b->row, isicol = b->icol; 3440 const PetscInt *r, *ic, *ics; 3441 PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3442 PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 3443 MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3444 PetscReal dt = info->dt, shift = info->shiftamount; 3445 PetscBool row_identity, col_identity; 3446 3447 PetscFunctionBegin; 3448 PetscCall(ISGetIndices(isrow, &r)); 3449 PetscCall(ISGetIndices(isicol, &ic)); 3450 PetscCall(PetscMalloc1(n + 1, &rtmp)); 3451 ics = ic; 3452 3453 for (i = 0; i < n; i++) { 3454 /* initialize rtmp array */ 3455 nzl = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */ 3456 bjtmp = bj + bi[i]; 3457 for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3458 rtmp[i] = 0.0; 3459 nzu = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */ 3460 bjtmp = bj + bdiag[i + 1] + 1; 3461 for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 3462 3463 /* load in initial unfactored row of A */ 3464 nz = ai[r[i] + 1] - ai[r[i]]; 3465 ajtmp = aj + ai[r[i]]; 3466 v = aa + ai[r[i]]; 3467 for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 3468 3469 /* numerical factorization */ 3470 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3471 nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3472 k = 0; 3473 while (k < nzl) { 3474 row = *bjtmp++; 3475 pc = rtmp + row; 3476 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3477 multiplier = (*pc) * (*pv); 3478 *pc = multiplier; 3479 if (PetscAbsScalar(multiplier) > dt) { 3480 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3481 pv = b->a + bdiag[row + 1] + 1; 3482 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3483 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3484 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3485 } 3486 k++; 3487 } 3488 3489 /* finished row so stick it into b->a */ 3490 /* L-part */ 3491 pv = b->a + bi[i]; 3492 pj = bj + bi[i]; 3493 nzl = bi[i + 1] - bi[i]; 3494 for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3495 3496 /* diagonal: invert diagonal entries for simpler triangular solves */ 3497 if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3498 b->a[bdiag[i]] = 1.0 / rtmp[i]; 3499 3500 /* U-part */ 3501 pv = b->a + bdiag[i + 1] + 1; 3502 pj = bj + bdiag[i + 1] + 1; 3503 nzu = bdiag[i] - bdiag[i + 1] - 1; 3504 for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3505 } 3506 3507 PetscCall(PetscFree(rtmp)); 3508 PetscCall(ISRestoreIndices(isicol, &ic)); 3509 PetscCall(ISRestoreIndices(isrow, &r)); 3510 3511 PetscCall(ISIdentity(isrow, &row_identity)); 3512 PetscCall(ISIdentity(isicol, &col_identity)); 3513 if (row_identity && col_identity) { 3514 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3515 } else { 3516 C->ops->solve = MatSolve_SeqAIJ; 3517 } 3518 C->ops->solveadd = NULL; 3519 C->ops->solvetranspose = NULL; 3520 C->ops->solvetransposeadd = NULL; 3521 C->ops->matsolve = NULL; 3522 C->assembled = PETSC_TRUE; 3523 C->preallocated = PETSC_TRUE; 3524 3525 PetscCall(PetscLogFlops(C->cmap->n)); 3526 PetscFunctionReturn(PETSC_SUCCESS); 3527 } 3528