1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petscbt.h> 4 #include <../src/mat/utils/freespace.h> 5 6 /* 7 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 8 9 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 10 */ 11 #if 0 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 15 PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 16 const PetscInt *ai = a->i, *aj = a->j; 17 const PetscScalar *aa = a->a; 18 PetscBool *done; 19 PetscReal best, past = 0, future; 20 21 PetscFunctionBegin; 22 /* pick initial row */ 23 best = -1; 24 for (i = 0; i < n; i++) { 25 future = 0.0; 26 for (j = ai[i]; j < ai[i + 1]; j++) { 27 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28 else past = PetscAbsScalar(aa[j]); 29 } 30 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 31 if (past / future > best) { 32 best = past / future; 33 current = i; 34 } 35 } 36 37 PetscCall(PetscMalloc1(n, &done)); 38 PetscCall(PetscArrayzero(done, n)); 39 PetscCall(PetscMalloc1(n, &order)); 40 order[0] = current; 41 for (i = 0; i < n - 1; i++) { 42 done[current] = PETSC_TRUE; 43 best = -1; 44 /* loop over all neighbors of current pivot */ 45 for (j = ai[current]; j < ai[current + 1]; j++) { 46 jj = aj[j]; 47 if (done[jj]) continue; 48 /* loop over columns of potential next row computing weights for below and above diagonal */ 49 past = future = 0.0; 50 for (k = ai[jj]; k < ai[jj + 1]; k++) { 51 kk = aj[k]; 52 if (done[kk]) past += PetscAbsScalar(aa[k]); 53 else if (kk != jj) future += PetscAbsScalar(aa[k]); 54 } 55 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 56 if (past / future > best) { 57 best = past / future; 58 newcurrent = jj; 59 } 60 } 61 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 62 best = -1; 63 for (k = 0; k < n; k++) { 64 if (done[k]) continue; 65 future = 0.0; 66 past = 0.0; 67 for (j = ai[k]; j < ai[k + 1]; j++) { 68 kk = aj[j]; 69 if (done[kk]) past += PetscAbsScalar(aa[j]); 70 else if (kk != k) future += PetscAbsScalar(aa[j]); 71 } 72 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 73 if (past / future > best) { 74 best = past / future; 75 newcurrent = k; 76 } 77 } 78 } 79 PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 80 current = newcurrent; 81 order[i + 1] = current; 82 } 83 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 84 *icol = *irow; 85 PetscCall(PetscObjectReference((PetscObject)*irow)); 86 PetscCall(PetscFree(done)); 87 PetscCall(PetscFree(order)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 #endif 91 92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 93 { 94 PetscFunctionBegin; 95 *type = MATSOLVERPETSC; 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 100 { 101 PetscInt n = A->rmap->n; 102 103 PetscFunctionBegin; 104 #if defined(PETSC_USE_COMPLEX) 105 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported"); 106 #endif 107 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 108 PetscCall(MatSetSizes(*B, n, n, n, n)); 109 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 110 PetscCall(MatSetType(*B, MATSEQAIJ)); 111 112 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 113 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 114 115 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 116 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 117 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 118 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 119 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 120 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 121 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 122 123 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 124 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 125 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 126 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 127 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 128 (*B)->factortype = ftype; 129 130 PetscCall(PetscFree((*B)->solvertype)); 131 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 132 (*B)->canuseordering = PETSC_TRUE; 133 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 #if 0 138 // currently unused 139 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 140 { 141 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 142 IS isicol; 143 const PetscInt *r, *ic; 144 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 145 PetscInt *bi, *bj, *ajtmp; 146 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 147 PetscReal f; 148 PetscInt nlnk, *lnk, k, **bi_ptr; 149 PetscFreeSpaceList free_space = NULL, current_space = NULL; 150 PetscBT lnkbt; 151 PetscBool missing; 152 153 PetscFunctionBegin; 154 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 155 PetscCall(MatMissingDiagonal(A, &missing, &i)); 156 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 157 158 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 159 PetscCall(ISGetIndices(isrow, &r)); 160 PetscCall(ISGetIndices(isicol, &ic)); 161 162 /* get new row pointers */ 163 PetscCall(PetscMalloc1(n + 1, &bi)); 164 bi[0] = 0; 165 166 /* bdiag is location of diagonal in factor */ 167 PetscCall(PetscMalloc1(n + 1, &bdiag)); 168 bdiag[0] = 0; 169 170 /* linked list for storing column indices of the active row */ 171 nlnk = n + 1; 172 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 173 174 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 175 176 /* initial FreeSpace size is f*(ai[n]+1) */ 177 f = info->fill; 178 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 179 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 180 current_space = free_space; 181 182 for (i = 0; i < n; i++) { 183 /* copy previous fill into linked list */ 184 nzi = 0; 185 nnz = ai[r[i] + 1] - ai[r[i]]; 186 ajtmp = aj + ai[r[i]]; 187 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 188 nzi += nlnk; 189 190 /* add pivot rows into linked list */ 191 row = lnk[n]; 192 while (row < i) { 193 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 194 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 195 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 196 nzi += nlnk; 197 row = lnk[row]; 198 } 199 bi[i + 1] = bi[i] + nzi; 200 im[i] = nzi; 201 202 /* mark bdiag */ 203 nzbd = 0; 204 nnz = nzi; 205 k = lnk[n]; 206 while (nnz-- && k < i) { 207 nzbd++; 208 k = lnk[k]; 209 } 210 bdiag[i] = bi[i] + nzbd; 211 212 /* if free space is not available, make more free space */ 213 if (current_space->local_remaining < nzi) { 214 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 215 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 216 reallocs++; 217 } 218 219 /* copy data into free space, then initialize lnk */ 220 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 221 222 bi_ptr[i] = current_space->array; 223 current_space->array += nzi; 224 current_space->local_used += nzi; 225 current_space->local_remaining -= nzi; 226 } 227 #if defined(PETSC_USE_INFO) 228 if (ai[n] != 0) { 229 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 230 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 231 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 232 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 233 PetscCall(PetscInfo(A, "for best performance.\n")); 234 } else { 235 PetscCall(PetscInfo(A, "Empty matrix\n")); 236 } 237 #endif 238 239 PetscCall(ISRestoreIndices(isrow, &r)); 240 PetscCall(ISRestoreIndices(isicol, &ic)); 241 242 /* destroy list of free space and other temporary array(s) */ 243 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 244 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 245 PetscCall(PetscLLDestroy(lnk, lnkbt)); 246 PetscCall(PetscFree2(bi_ptr, im)); 247 248 /* put together the new matrix */ 249 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 250 b = (Mat_SeqAIJ *)(B)->data; 251 252 b->free_a = PETSC_TRUE; 253 b->free_ij = PETSC_TRUE; 254 b->singlemalloc = PETSC_FALSE; 255 256 PetscCall(PetscMalloc1(bi[n] + 1, &b->a)); 257 b->j = bj; 258 b->i = bi; 259 b->diag = bdiag; 260 b->ilen = NULL; 261 b->imax = NULL; 262 b->row = isrow; 263 b->col = iscol; 264 PetscCall(PetscObjectReference((PetscObject)isrow)); 265 PetscCall(PetscObjectReference((PetscObject)iscol)); 266 b->icol = isicol; 267 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 268 269 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 270 b->maxnz = b->nz = bi[n]; 271 272 (B)->factortype = MAT_FACTOR_LU; 273 (B)->info.factor_mallocs = reallocs; 274 (B)->info.fill_ratio_given = f; 275 276 if (ai[n]) { 277 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 278 } else { 279 (B)->info.fill_ratio_needed = 0.0; 280 } 281 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 282 if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 283 PetscFunctionReturn(PETSC_SUCCESS); 284 } 285 #endif 286 287 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 288 { 289 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 290 IS isicol; 291 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 292 PetscInt i, n = A->rmap->n; 293 PetscInt *bi, *bj; 294 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 295 PetscReal f; 296 PetscInt nlnk, *lnk, k, **bi_ptr; 297 PetscFreeSpaceList free_space = NULL, current_space = NULL; 298 PetscBT lnkbt; 299 PetscBool missing; 300 301 PetscFunctionBegin; 302 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 303 PetscCall(MatMissingDiagonal(A, &missing, &i)); 304 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 305 306 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 307 PetscCall(ISGetIndices(isrow, &r)); 308 PetscCall(ISGetIndices(isicol, &ic)); 309 310 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 311 PetscCall(PetscMalloc1(n + 1, &bi)); 312 PetscCall(PetscMalloc1(n + 1, &bdiag)); 313 bi[0] = bdiag[0] = 0; 314 315 /* linked list for storing column indices of the active row */ 316 nlnk = n + 1; 317 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 318 319 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 320 321 /* initial FreeSpace size is f*(ai[n]+1) */ 322 f = info->fill; 323 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 324 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 325 current_space = free_space; 326 327 for (i = 0; i < n; i++) { 328 /* copy previous fill into linked list */ 329 nzi = 0; 330 nnz = ai[r[i] + 1] - ai[r[i]]; 331 ajtmp = aj + ai[r[i]]; 332 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 333 nzi += nlnk; 334 335 /* add pivot rows into linked list */ 336 row = lnk[n]; 337 while (row < i) { 338 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 339 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 340 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 341 nzi += nlnk; 342 row = lnk[row]; 343 } 344 bi[i + 1] = bi[i] + nzi; 345 im[i] = nzi; 346 347 /* mark bdiag */ 348 nzbd = 0; 349 nnz = nzi; 350 k = lnk[n]; 351 while (nnz-- && k < i) { 352 nzbd++; 353 k = lnk[k]; 354 } 355 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 356 357 /* if free space is not available, make more free space */ 358 if (current_space->local_remaining < nzi) { 359 /* estimated additional space needed */ 360 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 361 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 362 reallocs++; 363 } 364 365 /* copy data into free space, then initialize lnk */ 366 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 367 368 bi_ptr[i] = current_space->array; 369 current_space->array += nzi; 370 current_space->local_used += nzi; 371 current_space->local_remaining -= nzi; 372 } 373 374 PetscCall(ISRestoreIndices(isrow, &r)); 375 PetscCall(ISRestoreIndices(isicol, &ic)); 376 377 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 378 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 379 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 380 PetscCall(PetscLLDestroy(lnk, lnkbt)); 381 PetscCall(PetscFree2(bi_ptr, im)); 382 383 /* put together the new matrix */ 384 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 385 b = (Mat_SeqAIJ *)(B)->data; 386 387 b->free_a = PETSC_TRUE; 388 b->free_ij = PETSC_TRUE; 389 b->singlemalloc = PETSC_FALSE; 390 391 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 392 393 b->j = bj; 394 b->i = bi; 395 b->diag = bdiag; 396 b->ilen = NULL; 397 b->imax = NULL; 398 b->row = isrow; 399 b->col = iscol; 400 PetscCall(PetscObjectReference((PetscObject)isrow)); 401 PetscCall(PetscObjectReference((PetscObject)iscol)); 402 b->icol = isicol; 403 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 404 405 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 406 b->maxnz = b->nz = bdiag[0] + 1; 407 408 B->factortype = MAT_FACTOR_LU; 409 B->info.factor_mallocs = reallocs; 410 B->info.fill_ratio_given = f; 411 412 if (ai[n]) { 413 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 414 } else { 415 B->info.fill_ratio_needed = 0.0; 416 } 417 #if defined(PETSC_USE_INFO) 418 if (ai[n] != 0) { 419 PetscReal af = B->info.fill_ratio_needed; 420 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 421 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 422 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 423 PetscCall(PetscInfo(A, "for best performance.\n")); 424 } else { 425 PetscCall(PetscInfo(A, "Empty matrix\n")); 426 } 427 #endif 428 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 429 if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 430 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 /* 435 Trouble in factorization, should we dump the original matrix? 436 */ 437 PetscErrorCode MatFactorDumpMatrix(Mat A) 438 { 439 PetscBool flg = PETSC_FALSE; 440 441 PetscFunctionBegin; 442 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 443 if (flg) { 444 PetscViewer viewer; 445 char filename[PETSC_MAX_PATH_LEN]; 446 447 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 448 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 449 PetscCall(MatView(A, viewer)); 450 PetscCall(PetscViewerDestroy(&viewer)); 451 } 452 PetscFunctionReturn(PETSC_SUCCESS); 453 } 454 455 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 456 { 457 Mat C = B; 458 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 459 IS isrow = b->row, isicol = b->icol; 460 const PetscInt *r, *ic, *ics; 461 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 462 PetscInt i, j, k, nz, nzL, row, *pj; 463 const PetscInt *ajtmp, *bjtmp; 464 MatScalar *rtmp, *pc, multiplier, *pv; 465 const MatScalar *aa = a->a, *v; 466 PetscBool row_identity, col_identity; 467 FactorShiftCtx sctx; 468 const PetscInt *ddiag; 469 PetscReal rs; 470 MatScalar d; 471 472 PetscFunctionBegin; 473 /* MatPivotSetUp(): initialize shift context sctx */ 474 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 475 476 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 477 ddiag = a->diag; 478 sctx.shift_top = info->zeropivot; 479 for (i = 0; i < n; i++) { 480 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 481 d = (aa)[ddiag[i]]; 482 rs = -PetscAbsScalar(d) - PetscRealPart(d); 483 v = aa + ai[i]; 484 nz = ai[i + 1] - ai[i]; 485 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 486 if (rs > sctx.shift_top) sctx.shift_top = rs; 487 } 488 sctx.shift_top *= 1.1; 489 sctx.nshift_max = 5; 490 sctx.shift_lo = 0.; 491 sctx.shift_hi = 1.; 492 } 493 494 PetscCall(ISGetIndices(isrow, &r)); 495 PetscCall(ISGetIndices(isicol, &ic)); 496 PetscCall(PetscMalloc1(n + 1, &rtmp)); 497 ics = ic; 498 499 do { 500 sctx.newshift = PETSC_FALSE; 501 for (i = 0; i < n; i++) { 502 /* zero rtmp */ 503 /* L part */ 504 nz = bi[i + 1] - bi[i]; 505 bjtmp = bj + bi[i]; 506 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 507 508 /* U part */ 509 nz = bdiag[i] - bdiag[i + 1]; 510 bjtmp = bj + bdiag[i + 1] + 1; 511 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 512 513 /* load in initial (unfactored row) */ 514 nz = ai[r[i] + 1] - ai[r[i]]; 515 ajtmp = aj + ai[r[i]]; 516 v = aa + ai[r[i]]; 517 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 518 /* ZeropivotApply() */ 519 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 520 521 /* elimination */ 522 bjtmp = bj + bi[i]; 523 row = *bjtmp++; 524 nzL = bi[i + 1] - bi[i]; 525 for (k = 0; k < nzL; k++) { 526 pc = rtmp + row; 527 if (*pc != 0.0) { 528 pv = b->a + bdiag[row]; 529 multiplier = *pc * (*pv); 530 *pc = multiplier; 531 532 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 533 pv = b->a + bdiag[row + 1] + 1; 534 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 535 536 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 537 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 538 } 539 row = *bjtmp++; 540 } 541 542 /* finished row so stick it into b->a */ 543 rs = 0.0; 544 /* L part */ 545 pv = b->a + bi[i]; 546 pj = b->j + bi[i]; 547 nz = bi[i + 1] - bi[i]; 548 for (j = 0; j < nz; j++) { 549 pv[j] = rtmp[pj[j]]; 550 rs += PetscAbsScalar(pv[j]); 551 } 552 553 /* U part */ 554 pv = b->a + bdiag[i + 1] + 1; 555 pj = b->j + bdiag[i + 1] + 1; 556 nz = bdiag[i] - bdiag[i + 1] - 1; 557 for (j = 0; j < nz; j++) { 558 pv[j] = rtmp[pj[j]]; 559 rs += PetscAbsScalar(pv[j]); 560 } 561 562 sctx.rs = rs; 563 sctx.pv = rtmp[i]; 564 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 565 if (sctx.newshift) break; /* break for-loop */ 566 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 567 568 /* Mark diagonal and invert diagonal for simpler triangular solves */ 569 pv = b->a + bdiag[i]; 570 *pv = 1.0 / rtmp[i]; 571 572 } /* endof for (i=0; i<n; i++) { */ 573 574 /* MatPivotRefine() */ 575 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 576 /* 577 * if no shift in this attempt & shifting & started shifting & can refine, 578 * then try lower shift 579 */ 580 sctx.shift_hi = sctx.shift_fraction; 581 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 582 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 583 sctx.newshift = PETSC_TRUE; 584 sctx.nshift++; 585 } 586 } while (sctx.newshift); 587 588 PetscCall(PetscFree(rtmp)); 589 PetscCall(ISRestoreIndices(isicol, &ic)); 590 PetscCall(ISRestoreIndices(isrow, &r)); 591 592 PetscCall(ISIdentity(isrow, &row_identity)); 593 PetscCall(ISIdentity(isicol, &col_identity)); 594 if (b->inode.size) { 595 C->ops->solve = MatSolve_SeqAIJ_Inode; 596 } else if (row_identity && col_identity) { 597 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 598 } else { 599 C->ops->solve = MatSolve_SeqAIJ; 600 } 601 C->ops->solveadd = MatSolveAdd_SeqAIJ; 602 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 603 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 604 C->ops->matsolve = MatMatSolve_SeqAIJ; 605 C->assembled = PETSC_TRUE; 606 C->preallocated = PETSC_TRUE; 607 608 PetscCall(PetscLogFlops(C->cmap->n)); 609 610 /* MatShiftView(A,info,&sctx) */ 611 if (sctx.nshift) { 612 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 613 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 614 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 615 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 616 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 617 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 618 } 619 } 620 PetscFunctionReturn(PETSC_SUCCESS); 621 } 622 623 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 624 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 625 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 626 627 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 628 { 629 Mat C = B; 630 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 631 IS isrow = b->row, isicol = b->icol; 632 const PetscInt *r, *ic, *ics; 633 PetscInt nz, row, i, j, n = A->rmap->n, diag; 634 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 635 const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 636 MatScalar *pv, *rtmp, *pc, multiplier, d; 637 const MatScalar *v, *aa = a->a; 638 PetscReal rs = 0.0; 639 FactorShiftCtx sctx; 640 const PetscInt *ddiag; 641 PetscBool row_identity, col_identity; 642 643 PetscFunctionBegin; 644 /* MatPivotSetUp(): initialize shift context sctx */ 645 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 646 647 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 648 ddiag = a->diag; 649 sctx.shift_top = info->zeropivot; 650 for (i = 0; i < n; i++) { 651 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 652 d = (aa)[ddiag[i]]; 653 rs = -PetscAbsScalar(d) - PetscRealPart(d); 654 v = aa + ai[i]; 655 nz = ai[i + 1] - ai[i]; 656 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 657 if (rs > sctx.shift_top) sctx.shift_top = rs; 658 } 659 sctx.shift_top *= 1.1; 660 sctx.nshift_max = 5; 661 sctx.shift_lo = 0.; 662 sctx.shift_hi = 1.; 663 } 664 665 PetscCall(ISGetIndices(isrow, &r)); 666 PetscCall(ISGetIndices(isicol, &ic)); 667 PetscCall(PetscMalloc1(n + 1, &rtmp)); 668 ics = ic; 669 670 do { 671 sctx.newshift = PETSC_FALSE; 672 for (i = 0; i < n; i++) { 673 nz = bi[i + 1] - bi[i]; 674 bjtmp = bj + bi[i]; 675 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 676 677 /* load in initial (unfactored row) */ 678 nz = ai[r[i] + 1] - ai[r[i]]; 679 ajtmp = aj + ai[r[i]]; 680 v = aa + ai[r[i]]; 681 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 682 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 683 684 row = *bjtmp++; 685 while (row < i) { 686 pc = rtmp + row; 687 if (*pc != 0.0) { 688 pv = b->a + diag_offset[row]; 689 pj = b->j + diag_offset[row] + 1; 690 multiplier = *pc / *pv++; 691 *pc = multiplier; 692 nz = bi[row + 1] - diag_offset[row] - 1; 693 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 694 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 695 } 696 row = *bjtmp++; 697 } 698 /* finished row so stick it into b->a */ 699 pv = b->a + bi[i]; 700 pj = b->j + bi[i]; 701 nz = bi[i + 1] - bi[i]; 702 diag = diag_offset[i] - bi[i]; 703 rs = 0.0; 704 for (j = 0; j < nz; j++) { 705 pv[j] = rtmp[pj[j]]; 706 rs += PetscAbsScalar(pv[j]); 707 } 708 rs -= PetscAbsScalar(pv[diag]); 709 710 sctx.rs = rs; 711 sctx.pv = pv[diag]; 712 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 713 if (sctx.newshift) break; 714 pv[diag] = sctx.pv; 715 } 716 717 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 718 /* 719 * if no shift in this attempt & shifting & started shifting & can refine, 720 * then try lower shift 721 */ 722 sctx.shift_hi = sctx.shift_fraction; 723 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 724 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 725 sctx.newshift = PETSC_TRUE; 726 sctx.nshift++; 727 } 728 } while (sctx.newshift); 729 730 /* invert diagonal entries for simpler triangular solves */ 731 for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; 732 PetscCall(PetscFree(rtmp)); 733 PetscCall(ISRestoreIndices(isicol, &ic)); 734 PetscCall(ISRestoreIndices(isrow, &r)); 735 736 PetscCall(ISIdentity(isrow, &row_identity)); 737 PetscCall(ISIdentity(isicol, &col_identity)); 738 if (row_identity && col_identity) { 739 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 740 } else { 741 C->ops->solve = MatSolve_SeqAIJ_inplace; 742 } 743 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 744 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 745 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 746 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 747 748 C->assembled = PETSC_TRUE; 749 C->preallocated = PETSC_TRUE; 750 751 PetscCall(PetscLogFlops(C->cmap->n)); 752 if (sctx.nshift) { 753 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 754 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 755 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 756 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 757 } 758 } 759 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 760 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 761 762 PetscCall(MatSeqAIJCheckInode(C)); 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 767 768 /* 769 This routine implements inplace ILU(0) with row or/and column permutations. 770 Input: 771 A - original matrix 772 Output; 773 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 774 a->j (col index) is permuted by the inverse of colperm, then sorted 775 a->a reordered accordingly with a->j 776 a->diag (ptr to diagonal elements) is updated. 777 */ 778 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 779 { 780 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 781 IS isrow = a->row, isicol = a->icol; 782 const PetscInt *r, *ic, *ics; 783 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 784 PetscInt *ajtmp, nz, row; 785 PetscInt *diag = a->diag, nbdiag, *pj; 786 PetscScalar *rtmp, *pc, multiplier, d; 787 MatScalar *pv, *v; 788 PetscReal rs; 789 FactorShiftCtx sctx; 790 const MatScalar *aa = a->a, *vtmp; 791 792 PetscFunctionBegin; 793 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 794 795 /* MatPivotSetUp(): initialize shift context sctx */ 796 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 797 798 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 799 const PetscInt *ddiag = a->diag; 800 sctx.shift_top = info->zeropivot; 801 for (i = 0; i < n; i++) { 802 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 803 d = (aa)[ddiag[i]]; 804 rs = -PetscAbsScalar(d) - PetscRealPart(d); 805 vtmp = aa + ai[i]; 806 nz = ai[i + 1] - ai[i]; 807 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 808 if (rs > sctx.shift_top) sctx.shift_top = rs; 809 } 810 sctx.shift_top *= 1.1; 811 sctx.nshift_max = 5; 812 sctx.shift_lo = 0.; 813 sctx.shift_hi = 1.; 814 } 815 816 PetscCall(ISGetIndices(isrow, &r)); 817 PetscCall(ISGetIndices(isicol, &ic)); 818 PetscCall(PetscMalloc1(n + 1, &rtmp)); 819 PetscCall(PetscArrayzero(rtmp, n + 1)); 820 ics = ic; 821 822 #if defined(MV) 823 sctx.shift_top = 0.; 824 sctx.nshift_max = 0; 825 sctx.shift_lo = 0.; 826 sctx.shift_hi = 0.; 827 sctx.shift_fraction = 0.; 828 829 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 830 sctx.shift_top = 0.; 831 for (i = 0; i < n; i++) { 832 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 833 d = (a->a)[diag[i]]; 834 rs = -PetscAbsScalar(d) - PetscRealPart(d); 835 v = a->a + ai[i]; 836 nz = ai[i + 1] - ai[i]; 837 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 838 if (rs > sctx.shift_top) sctx.shift_top = rs; 839 } 840 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 841 sctx.shift_top *= 1.1; 842 sctx.nshift_max = 5; 843 sctx.shift_lo = 0.; 844 sctx.shift_hi = 1.; 845 } 846 847 sctx.shift_amount = 0.; 848 sctx.nshift = 0; 849 #endif 850 851 do { 852 sctx.newshift = PETSC_FALSE; 853 for (i = 0; i < n; i++) { 854 /* load in initial unfactored row */ 855 nz = ai[r[i] + 1] - ai[r[i]]; 856 ajtmp = aj + ai[r[i]]; 857 v = a->a + ai[r[i]]; 858 /* sort permuted ajtmp and values v accordingly */ 859 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 860 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 861 862 diag[r[i]] = ai[r[i]]; 863 for (j = 0; j < nz; j++) { 864 rtmp[ajtmp[j]] = v[j]; 865 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 866 } 867 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 868 869 row = *ajtmp++; 870 while (row < i) { 871 pc = rtmp + row; 872 if (*pc != 0.0) { 873 pv = a->a + diag[r[row]]; 874 pj = aj + diag[r[row]] + 1; 875 876 multiplier = *pc / *pv++; 877 *pc = multiplier; 878 nz = ai[r[row] + 1] - diag[r[row]] - 1; 879 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 880 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 881 } 882 row = *ajtmp++; 883 } 884 /* finished row so overwrite it onto a->a */ 885 pv = a->a + ai[r[i]]; 886 pj = aj + ai[r[i]]; 887 nz = ai[r[i] + 1] - ai[r[i]]; 888 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 889 890 rs = 0.0; 891 for (j = 0; j < nz; j++) { 892 pv[j] = rtmp[pj[j]]; 893 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 894 } 895 896 sctx.rs = rs; 897 sctx.pv = pv[nbdiag]; 898 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 899 if (sctx.newshift) break; 900 pv[nbdiag] = sctx.pv; 901 } 902 903 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 904 /* 905 * if no shift in this attempt & shifting & started shifting & can refine, 906 * then try lower shift 907 */ 908 sctx.shift_hi = sctx.shift_fraction; 909 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 910 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 911 sctx.newshift = PETSC_TRUE; 912 sctx.nshift++; 913 } 914 } while (sctx.newshift); 915 916 /* invert diagonal entries for simpler triangular solves */ 917 for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; 918 919 PetscCall(PetscFree(rtmp)); 920 PetscCall(ISRestoreIndices(isicol, &ic)); 921 PetscCall(ISRestoreIndices(isrow, &r)); 922 923 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 924 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 925 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 926 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 927 928 A->assembled = PETSC_TRUE; 929 A->preallocated = PETSC_TRUE; 930 931 PetscCall(PetscLogFlops(A->cmap->n)); 932 if (sctx.nshift) { 933 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 934 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 935 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 936 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 937 } 938 } 939 PetscFunctionReturn(PETSC_SUCCESS); 940 } 941 942 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 943 { 944 Mat C; 945 946 PetscFunctionBegin; 947 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 948 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 949 PetscCall(MatLUFactorNumeric(C, A, info)); 950 951 A->ops->solve = C->ops->solve; 952 A->ops->solvetranspose = C->ops->solvetranspose; 953 954 PetscCall(MatHeaderMerge(A, &C)); 955 PetscFunctionReturn(PETSC_SUCCESS); 956 } 957 958 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 959 { 960 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 961 IS iscol = a->col, isrow = a->row; 962 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 963 PetscInt nz; 964 const PetscInt *rout, *cout, *r, *c; 965 PetscScalar *x, *tmp, *tmps, sum; 966 const PetscScalar *b; 967 const MatScalar *aa = a->a, *v; 968 969 PetscFunctionBegin; 970 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 971 972 PetscCall(VecGetArrayRead(bb, &b)); 973 PetscCall(VecGetArrayWrite(xx, &x)); 974 tmp = a->solve_work; 975 976 PetscCall(ISGetIndices(isrow, &rout)); 977 r = rout; 978 PetscCall(ISGetIndices(iscol, &cout)); 979 c = cout + (n - 1); 980 981 /* forward solve the lower triangular */ 982 tmp[0] = b[*r++]; 983 tmps = tmp; 984 for (i = 1; i < n; i++) { 985 v = aa + ai[i]; 986 vi = aj + ai[i]; 987 nz = a->diag[i] - ai[i]; 988 sum = b[*r++]; 989 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 990 tmp[i] = sum; 991 } 992 993 /* backward solve the upper triangular */ 994 for (i = n - 1; i >= 0; i--) { 995 v = aa + a->diag[i] + 1; 996 vi = aj + a->diag[i] + 1; 997 nz = ai[i + 1] - a->diag[i] - 1; 998 sum = tmp[i]; 999 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1000 x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 1001 } 1002 1003 PetscCall(ISRestoreIndices(isrow, &rout)); 1004 PetscCall(ISRestoreIndices(iscol, &cout)); 1005 PetscCall(VecRestoreArrayRead(bb, &b)); 1006 PetscCall(VecRestoreArrayWrite(xx, &x)); 1007 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1008 PetscFunctionReturn(PETSC_SUCCESS); 1009 } 1010 1011 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 1012 { 1013 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1014 IS iscol = a->col, isrow = a->row; 1015 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1016 PetscInt nz, neq, ldb, ldx; 1017 const PetscInt *rout, *cout, *r, *c; 1018 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1019 const PetscScalar *b, *aa = a->a, *v; 1020 PetscBool isdense; 1021 1022 PetscFunctionBegin; 1023 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1024 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1025 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1026 if (X != B) { 1027 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1028 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1029 } 1030 PetscCall(MatDenseGetArrayRead(B, &b)); 1031 PetscCall(MatDenseGetLDA(B, &ldb)); 1032 PetscCall(MatDenseGetArray(X, &x)); 1033 PetscCall(MatDenseGetLDA(X, &ldx)); 1034 PetscCall(ISGetIndices(isrow, &rout)); 1035 r = rout; 1036 PetscCall(ISGetIndices(iscol, &cout)); 1037 c = cout; 1038 for (neq = 0; neq < B->cmap->n; neq++) { 1039 /* forward solve the lower triangular */ 1040 tmp[0] = b[r[0]]; 1041 tmps = tmp; 1042 for (i = 1; i < n; i++) { 1043 v = aa + ai[i]; 1044 vi = aj + ai[i]; 1045 nz = a->diag[i] - ai[i]; 1046 sum = b[r[i]]; 1047 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1048 tmp[i] = sum; 1049 } 1050 /* backward solve the upper triangular */ 1051 for (i = n - 1; i >= 0; i--) { 1052 v = aa + a->diag[i] + 1; 1053 vi = aj + a->diag[i] + 1; 1054 nz = ai[i + 1] - a->diag[i] - 1; 1055 sum = tmp[i]; 1056 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1057 x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 1058 } 1059 b += ldb; 1060 x += ldx; 1061 } 1062 PetscCall(ISRestoreIndices(isrow, &rout)); 1063 PetscCall(ISRestoreIndices(iscol, &cout)); 1064 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1065 PetscCall(MatDenseRestoreArray(X, &x)); 1066 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 1071 { 1072 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1073 IS iscol = a->col, isrow = a->row; 1074 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1075 PetscInt nz, neq, ldb, ldx; 1076 const PetscInt *rout, *cout, *r, *c; 1077 PetscScalar *x, *tmp = a->solve_work, sum; 1078 const PetscScalar *b, *aa = a->a, *v; 1079 PetscBool isdense; 1080 1081 PetscFunctionBegin; 1082 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1083 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1084 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1085 if (X != B) { 1086 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1087 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1088 } 1089 PetscCall(MatDenseGetArrayRead(B, &b)); 1090 PetscCall(MatDenseGetLDA(B, &ldb)); 1091 PetscCall(MatDenseGetArray(X, &x)); 1092 PetscCall(MatDenseGetLDA(X, &ldx)); 1093 PetscCall(ISGetIndices(isrow, &rout)); 1094 r = rout; 1095 PetscCall(ISGetIndices(iscol, &cout)); 1096 c = cout; 1097 for (neq = 0; neq < B->cmap->n; neq++) { 1098 /* forward solve the lower triangular */ 1099 tmp[0] = b[r[0]]; 1100 v = aa; 1101 vi = aj; 1102 for (i = 1; i < n; i++) { 1103 nz = ai[i + 1] - ai[i]; 1104 sum = b[r[i]]; 1105 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1106 tmp[i] = sum; 1107 v += nz; 1108 vi += nz; 1109 } 1110 /* backward solve the upper triangular */ 1111 for (i = n - 1; i >= 0; i--) { 1112 v = aa + adiag[i + 1] + 1; 1113 vi = aj + adiag[i + 1] + 1; 1114 nz = adiag[i] - adiag[i + 1] - 1; 1115 sum = tmp[i]; 1116 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1117 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 1118 } 1119 b += ldb; 1120 x += ldx; 1121 } 1122 PetscCall(ISRestoreIndices(isrow, &rout)); 1123 PetscCall(ISRestoreIndices(iscol, &cout)); 1124 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1125 PetscCall(MatDenseRestoreArray(X, &x)); 1126 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 1131 { 1132 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1133 IS iscol = a->col, isrow = a->row; 1134 const PetscInt *r, *c, *rout, *cout; 1135 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1136 PetscInt nz, row; 1137 PetscScalar *x, *tmp, *tmps, sum; 1138 const PetscScalar *b; 1139 const MatScalar *aa = a->a, *v; 1140 1141 PetscFunctionBegin; 1142 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1143 1144 PetscCall(VecGetArrayRead(bb, &b)); 1145 PetscCall(VecGetArrayWrite(xx, &x)); 1146 tmp = a->solve_work; 1147 1148 PetscCall(ISGetIndices(isrow, &rout)); 1149 r = rout; 1150 PetscCall(ISGetIndices(iscol, &cout)); 1151 c = cout + (n - 1); 1152 1153 /* forward solve the lower triangular */ 1154 tmp[0] = b[*r++]; 1155 tmps = tmp; 1156 for (row = 1; row < n; row++) { 1157 i = rout[row]; /* permuted row */ 1158 v = aa + ai[i]; 1159 vi = aj + ai[i]; 1160 nz = a->diag[i] - ai[i]; 1161 sum = b[*r++]; 1162 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1163 tmp[row] = sum; 1164 } 1165 1166 /* backward solve the upper triangular */ 1167 for (row = n - 1; row >= 0; row--) { 1168 i = rout[row]; /* permuted row */ 1169 v = aa + a->diag[i] + 1; 1170 vi = aj + a->diag[i] + 1; 1171 nz = ai[i + 1] - a->diag[i] - 1; 1172 sum = tmp[row]; 1173 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1174 x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1175 } 1176 1177 PetscCall(ISRestoreIndices(isrow, &rout)); 1178 PetscCall(ISRestoreIndices(iscol, &cout)); 1179 PetscCall(VecRestoreArrayRead(bb, &b)); 1180 PetscCall(VecRestoreArrayWrite(xx, &x)); 1181 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1186 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1187 { 1188 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1189 PetscInt n = A->rmap->n; 1190 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1191 PetscScalar *x; 1192 const PetscScalar *b; 1193 const MatScalar *aa = a->a; 1194 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1195 PetscInt adiag_i, i, nz, ai_i; 1196 const PetscInt *vi; 1197 const MatScalar *v; 1198 PetscScalar sum; 1199 #endif 1200 1201 PetscFunctionBegin; 1202 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1203 1204 PetscCall(VecGetArrayRead(bb, &b)); 1205 PetscCall(VecGetArrayWrite(xx, &x)); 1206 1207 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1208 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1209 #else 1210 /* forward solve the lower triangular */ 1211 x[0] = b[0]; 1212 for (i = 1; i < n; i++) { 1213 ai_i = ai[i]; 1214 v = aa + ai_i; 1215 vi = aj + ai_i; 1216 nz = adiag[i] - ai_i; 1217 sum = b[i]; 1218 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1219 x[i] = sum; 1220 } 1221 1222 /* backward solve the upper triangular */ 1223 for (i = n - 1; i >= 0; i--) { 1224 adiag_i = adiag[i]; 1225 v = aa + adiag_i + 1; 1226 vi = aj + adiag_i + 1; 1227 nz = ai[i + 1] - adiag_i - 1; 1228 sum = x[i]; 1229 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1230 x[i] = sum * aa[adiag_i]; 1231 } 1232 #endif 1233 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1234 PetscCall(VecRestoreArrayRead(bb, &b)); 1235 PetscCall(VecRestoreArrayWrite(xx, &x)); 1236 PetscFunctionReturn(PETSC_SUCCESS); 1237 } 1238 1239 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1240 { 1241 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1242 IS iscol = a->col, isrow = a->row; 1243 PetscInt i, n = A->rmap->n, j; 1244 PetscInt nz; 1245 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 1246 PetscScalar *x, *tmp, sum; 1247 const PetscScalar *b; 1248 const MatScalar *aa = a->a, *v; 1249 1250 PetscFunctionBegin; 1251 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1252 1253 PetscCall(VecGetArrayRead(bb, &b)); 1254 PetscCall(VecGetArray(xx, &x)); 1255 tmp = a->solve_work; 1256 1257 PetscCall(ISGetIndices(isrow, &rout)); 1258 r = rout; 1259 PetscCall(ISGetIndices(iscol, &cout)); 1260 c = cout + (n - 1); 1261 1262 /* forward solve the lower triangular */ 1263 tmp[0] = b[*r++]; 1264 for (i = 1; i < n; i++) { 1265 v = aa + ai[i]; 1266 vi = aj + ai[i]; 1267 nz = a->diag[i] - ai[i]; 1268 sum = b[*r++]; 1269 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1270 tmp[i] = sum; 1271 } 1272 1273 /* backward solve the upper triangular */ 1274 for (i = n - 1; i >= 0; i--) { 1275 v = aa + a->diag[i] + 1; 1276 vi = aj + a->diag[i] + 1; 1277 nz = ai[i + 1] - a->diag[i] - 1; 1278 sum = tmp[i]; 1279 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1280 tmp[i] = sum * aa[a->diag[i]]; 1281 x[*c--] += tmp[i]; 1282 } 1283 1284 PetscCall(ISRestoreIndices(isrow, &rout)); 1285 PetscCall(ISRestoreIndices(iscol, &cout)); 1286 PetscCall(VecRestoreArrayRead(bb, &b)); 1287 PetscCall(VecRestoreArray(xx, &x)); 1288 PetscCall(PetscLogFlops(2.0 * a->nz)); 1289 PetscFunctionReturn(PETSC_SUCCESS); 1290 } 1291 1292 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1293 { 1294 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1295 IS iscol = a->col, isrow = a->row; 1296 PetscInt i, n = A->rmap->n, j; 1297 PetscInt nz; 1298 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1299 PetscScalar *x, *tmp, sum; 1300 const PetscScalar *b; 1301 const MatScalar *aa = a->a, *v; 1302 1303 PetscFunctionBegin; 1304 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1305 1306 PetscCall(VecGetArrayRead(bb, &b)); 1307 PetscCall(VecGetArray(xx, &x)); 1308 tmp = a->solve_work; 1309 1310 PetscCall(ISGetIndices(isrow, &rout)); 1311 r = rout; 1312 PetscCall(ISGetIndices(iscol, &cout)); 1313 c = cout; 1314 1315 /* forward solve the lower triangular */ 1316 tmp[0] = b[r[0]]; 1317 v = aa; 1318 vi = aj; 1319 for (i = 1; i < n; i++) { 1320 nz = ai[i + 1] - ai[i]; 1321 sum = b[r[i]]; 1322 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1323 tmp[i] = sum; 1324 v += nz; 1325 vi += nz; 1326 } 1327 1328 /* backward solve the upper triangular */ 1329 v = aa + adiag[n - 1]; 1330 vi = aj + adiag[n - 1]; 1331 for (i = n - 1; i >= 0; i--) { 1332 nz = adiag[i] - adiag[i + 1] - 1; 1333 sum = tmp[i]; 1334 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1335 tmp[i] = sum * v[nz]; 1336 x[c[i]] += tmp[i]; 1337 v += nz + 1; 1338 vi += nz + 1; 1339 } 1340 1341 PetscCall(ISRestoreIndices(isrow, &rout)); 1342 PetscCall(ISRestoreIndices(iscol, &cout)); 1343 PetscCall(VecRestoreArrayRead(bb, &b)); 1344 PetscCall(VecRestoreArray(xx, &x)); 1345 PetscCall(PetscLogFlops(2.0 * a->nz)); 1346 PetscFunctionReturn(PETSC_SUCCESS); 1347 } 1348 1349 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1350 { 1351 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1352 IS iscol = a->col, isrow = a->row; 1353 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1354 PetscInt i, n = A->rmap->n, j; 1355 PetscInt nz; 1356 PetscScalar *x, *tmp, s1; 1357 const MatScalar *aa = a->a, *v; 1358 const PetscScalar *b; 1359 1360 PetscFunctionBegin; 1361 PetscCall(VecGetArrayRead(bb, &b)); 1362 PetscCall(VecGetArrayWrite(xx, &x)); 1363 tmp = a->solve_work; 1364 1365 PetscCall(ISGetIndices(isrow, &rout)); 1366 r = rout; 1367 PetscCall(ISGetIndices(iscol, &cout)); 1368 c = cout; 1369 1370 /* copy the b into temp work space according to permutation */ 1371 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1372 1373 /* forward solve the U^T */ 1374 for (i = 0; i < n; i++) { 1375 v = aa + diag[i]; 1376 vi = aj + diag[i] + 1; 1377 nz = ai[i + 1] - diag[i] - 1; 1378 s1 = tmp[i]; 1379 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1380 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1381 tmp[i] = s1; 1382 } 1383 1384 /* backward solve the L^T */ 1385 for (i = n - 1; i >= 0; i--) { 1386 v = aa + diag[i] - 1; 1387 vi = aj + diag[i] - 1; 1388 nz = diag[i] - ai[i]; 1389 s1 = tmp[i]; 1390 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1391 } 1392 1393 /* copy tmp into x according to permutation */ 1394 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1395 1396 PetscCall(ISRestoreIndices(isrow, &rout)); 1397 PetscCall(ISRestoreIndices(iscol, &cout)); 1398 PetscCall(VecRestoreArrayRead(bb, &b)); 1399 PetscCall(VecRestoreArrayWrite(xx, &x)); 1400 1401 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1402 PetscFunctionReturn(PETSC_SUCCESS); 1403 } 1404 1405 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1406 { 1407 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1408 IS iscol = a->col, isrow = a->row; 1409 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1410 PetscInt i, n = A->rmap->n, j; 1411 PetscInt nz; 1412 PetscScalar *x, *tmp, s1; 1413 const MatScalar *aa = a->a, *v; 1414 const PetscScalar *b; 1415 1416 PetscFunctionBegin; 1417 PetscCall(VecGetArrayRead(bb, &b)); 1418 PetscCall(VecGetArrayWrite(xx, &x)); 1419 tmp = a->solve_work; 1420 1421 PetscCall(ISGetIndices(isrow, &rout)); 1422 r = rout; 1423 PetscCall(ISGetIndices(iscol, &cout)); 1424 c = cout; 1425 1426 /* copy the b into temp work space according to permutation */ 1427 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1428 1429 /* forward solve the U^T */ 1430 for (i = 0; i < n; i++) { 1431 v = aa + adiag[i + 1] + 1; 1432 vi = aj + adiag[i + 1] + 1; 1433 nz = adiag[i] - adiag[i + 1] - 1; 1434 s1 = tmp[i]; 1435 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1436 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1437 tmp[i] = s1; 1438 } 1439 1440 /* backward solve the L^T */ 1441 for (i = n - 1; i >= 0; i--) { 1442 v = aa + ai[i]; 1443 vi = aj + ai[i]; 1444 nz = ai[i + 1] - ai[i]; 1445 s1 = tmp[i]; 1446 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1447 } 1448 1449 /* copy tmp into x according to permutation */ 1450 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1451 1452 PetscCall(ISRestoreIndices(isrow, &rout)); 1453 PetscCall(ISRestoreIndices(iscol, &cout)); 1454 PetscCall(VecRestoreArrayRead(bb, &b)); 1455 PetscCall(VecRestoreArrayWrite(xx, &x)); 1456 1457 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1458 PetscFunctionReturn(PETSC_SUCCESS); 1459 } 1460 1461 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1462 { 1463 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1464 IS iscol = a->col, isrow = a->row; 1465 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1466 PetscInt i, n = A->rmap->n, j; 1467 PetscInt nz; 1468 PetscScalar *x, *tmp, s1; 1469 const MatScalar *aa = a->a, *v; 1470 const PetscScalar *b; 1471 1472 PetscFunctionBegin; 1473 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1474 PetscCall(VecGetArrayRead(bb, &b)); 1475 PetscCall(VecGetArray(xx, &x)); 1476 tmp = a->solve_work; 1477 1478 PetscCall(ISGetIndices(isrow, &rout)); 1479 r = rout; 1480 PetscCall(ISGetIndices(iscol, &cout)); 1481 c = cout; 1482 1483 /* copy the b into temp work space according to permutation */ 1484 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1485 1486 /* forward solve the U^T */ 1487 for (i = 0; i < n; i++) { 1488 v = aa + diag[i]; 1489 vi = aj + diag[i] + 1; 1490 nz = ai[i + 1] - diag[i] - 1; 1491 s1 = tmp[i]; 1492 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1493 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1494 tmp[i] = s1; 1495 } 1496 1497 /* backward solve the L^T */ 1498 for (i = n - 1; i >= 0; i--) { 1499 v = aa + diag[i] - 1; 1500 vi = aj + diag[i] - 1; 1501 nz = diag[i] - ai[i]; 1502 s1 = tmp[i]; 1503 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1504 } 1505 1506 /* copy tmp into x according to permutation */ 1507 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1508 1509 PetscCall(ISRestoreIndices(isrow, &rout)); 1510 PetscCall(ISRestoreIndices(iscol, &cout)); 1511 PetscCall(VecRestoreArrayRead(bb, &b)); 1512 PetscCall(VecRestoreArray(xx, &x)); 1513 1514 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1515 PetscFunctionReturn(PETSC_SUCCESS); 1516 } 1517 1518 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1519 { 1520 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1521 IS iscol = a->col, isrow = a->row; 1522 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1523 PetscInt i, n = A->rmap->n, j; 1524 PetscInt nz; 1525 PetscScalar *x, *tmp, s1; 1526 const MatScalar *aa = a->a, *v; 1527 const PetscScalar *b; 1528 1529 PetscFunctionBegin; 1530 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1531 PetscCall(VecGetArrayRead(bb, &b)); 1532 PetscCall(VecGetArray(xx, &x)); 1533 tmp = a->solve_work; 1534 1535 PetscCall(ISGetIndices(isrow, &rout)); 1536 r = rout; 1537 PetscCall(ISGetIndices(iscol, &cout)); 1538 c = cout; 1539 1540 /* copy the b into temp work space according to permutation */ 1541 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1542 1543 /* forward solve the U^T */ 1544 for (i = 0; i < n; i++) { 1545 v = aa + adiag[i + 1] + 1; 1546 vi = aj + adiag[i + 1] + 1; 1547 nz = adiag[i] - adiag[i + 1] - 1; 1548 s1 = tmp[i]; 1549 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1550 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1551 tmp[i] = s1; 1552 } 1553 1554 /* backward solve the L^T */ 1555 for (i = n - 1; i >= 0; i--) { 1556 v = aa + ai[i]; 1557 vi = aj + ai[i]; 1558 nz = ai[i + 1] - ai[i]; 1559 s1 = tmp[i]; 1560 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1561 } 1562 1563 /* copy tmp into x according to permutation */ 1564 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1565 1566 PetscCall(ISRestoreIndices(isrow, &rout)); 1567 PetscCall(ISRestoreIndices(iscol, &cout)); 1568 PetscCall(VecRestoreArrayRead(bb, &b)); 1569 PetscCall(VecRestoreArray(xx, &x)); 1570 1571 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1572 PetscFunctionReturn(PETSC_SUCCESS); 1573 } 1574 1575 /* 1576 ilu() under revised new data structure. 1577 Factored arrays bj and ba are stored as 1578 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1579 1580 bi=fact->i is an array of size n+1, in which 1581 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1582 bi[n]: points to L(n-1,n-1)+1 1583 1584 bdiag=fact->diag is an array of size n+1,in which 1585 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1586 bdiag[n]: points to entry of U(n-1,0)-1 1587 1588 U(i,:) contains bdiag[i] as its last entry, i.e., 1589 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1590 */ 1591 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1592 { 1593 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1594 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1595 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1596 IS isicol; 1597 1598 PetscFunctionBegin; 1599 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1600 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1601 b = (Mat_SeqAIJ *)(fact)->data; 1602 1603 /* allocate matrix arrays for new data structure */ 1604 PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i)); 1605 1606 b->singlemalloc = PETSC_TRUE; 1607 if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1608 bdiag = b->diag; 1609 1610 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1611 1612 /* set bi and bj with new data structure */ 1613 bi = b->i; 1614 bj = b->j; 1615 1616 /* L part */ 1617 bi[0] = 0; 1618 for (i = 0; i < n; i++) { 1619 nz = adiag[i] - ai[i]; 1620 bi[i + 1] = bi[i] + nz; 1621 aj = a->j + ai[i]; 1622 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1623 } 1624 1625 /* U part */ 1626 bdiag[n] = bi[n] - 1; 1627 for (i = n - 1; i >= 0; i--) { 1628 nz = ai[i + 1] - adiag[i] - 1; 1629 aj = a->j + adiag[i] + 1; 1630 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1631 /* diag[i] */ 1632 bj[k++] = i; 1633 bdiag[i] = bdiag[i + 1] + nz + 1; 1634 } 1635 1636 fact->factortype = MAT_FACTOR_ILU; 1637 fact->info.factor_mallocs = 0; 1638 fact->info.fill_ratio_given = info->fill; 1639 fact->info.fill_ratio_needed = 1.0; 1640 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1641 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1642 1643 b = (Mat_SeqAIJ *)(fact)->data; 1644 b->row = isrow; 1645 b->col = iscol; 1646 b->icol = isicol; 1647 PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 1648 PetscCall(PetscObjectReference((PetscObject)isrow)); 1649 PetscCall(PetscObjectReference((PetscObject)iscol)); 1650 PetscFunctionReturn(PETSC_SUCCESS); 1651 } 1652 1653 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1654 { 1655 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1656 IS isicol; 1657 const PetscInt *r, *ic; 1658 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1659 PetscInt *bi, *cols, nnz, *cols_lvl; 1660 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1661 PetscInt i, levels, diagonal_fill; 1662 PetscBool col_identity, row_identity, missing; 1663 PetscReal f; 1664 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1665 PetscBT lnkbt; 1666 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1667 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1668 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1669 1670 PetscFunctionBegin; 1671 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1672 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1673 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1674 1675 levels = (PetscInt)info->levels; 1676 PetscCall(ISIdentity(isrow, &row_identity)); 1677 PetscCall(ISIdentity(iscol, &col_identity)); 1678 if (!levels && row_identity && col_identity) { 1679 /* special case: ilu(0) with natural ordering */ 1680 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1681 if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1682 PetscFunctionReturn(PETSC_SUCCESS); 1683 } 1684 1685 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1686 PetscCall(ISGetIndices(isrow, &r)); 1687 PetscCall(ISGetIndices(isicol, &ic)); 1688 1689 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1690 PetscCall(PetscMalloc1(n + 1, &bi)); 1691 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1692 bi[0] = bdiag[0] = 0; 1693 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1694 1695 /* create a linked list for storing column indices of the active row */ 1696 nlnk = n + 1; 1697 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1698 1699 /* initial FreeSpace size is f*(ai[n]+1) */ 1700 f = info->fill; 1701 diagonal_fill = (PetscInt)info->diagonal_fill; 1702 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1703 current_space = free_space; 1704 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1705 current_space_lvl = free_space_lvl; 1706 for (i = 0; i < n; i++) { 1707 nzi = 0; 1708 /* copy current row into linked list */ 1709 nnz = ai[r[i] + 1] - ai[r[i]]; 1710 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1711 cols = aj + ai[r[i]]; 1712 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1713 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1714 nzi += nlnk; 1715 1716 /* make sure diagonal entry is included */ 1717 if (diagonal_fill && lnk[i] == -1) { 1718 fm = n; 1719 while (lnk[fm] < i) fm = lnk[fm]; 1720 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1721 lnk[fm] = i; 1722 lnk_lvl[i] = 0; 1723 nzi++; 1724 dcount++; 1725 } 1726 1727 /* add pivot rows into the active row */ 1728 nzbd = 0; 1729 prow = lnk[n]; 1730 while (prow < i) { 1731 nnz = bdiag[prow]; 1732 cols = bj_ptr[prow] + nnz + 1; 1733 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1734 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1735 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1736 nzi += nlnk; 1737 prow = lnk[prow]; 1738 nzbd++; 1739 } 1740 bdiag[i] = nzbd; 1741 bi[i + 1] = bi[i] + nzi; 1742 /* if free space is not available, make more free space */ 1743 if (current_space->local_remaining < nzi) { 1744 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1745 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1746 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1747 reallocs++; 1748 } 1749 1750 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1751 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1752 bj_ptr[i] = current_space->array; 1753 bjlvl_ptr[i] = current_space_lvl->array; 1754 1755 /* make sure the active row i has diagonal entry */ 1756 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1757 1758 current_space->array += nzi; 1759 current_space->local_used += nzi; 1760 current_space->local_remaining -= nzi; 1761 current_space_lvl->array += nzi; 1762 current_space_lvl->local_used += nzi; 1763 current_space_lvl->local_remaining -= nzi; 1764 } 1765 1766 PetscCall(ISRestoreIndices(isrow, &r)); 1767 PetscCall(ISRestoreIndices(isicol, &ic)); 1768 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1769 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1770 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1771 1772 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1773 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1774 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1775 1776 #if defined(PETSC_USE_INFO) 1777 { 1778 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1779 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1780 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1781 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1782 PetscCall(PetscInfo(A, "for best performance.\n")); 1783 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1784 } 1785 #endif 1786 /* put together the new matrix */ 1787 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1788 b = (Mat_SeqAIJ *)(fact)->data; 1789 1790 b->free_a = PETSC_TRUE; 1791 b->free_ij = PETSC_TRUE; 1792 b->singlemalloc = PETSC_FALSE; 1793 1794 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 1795 1796 b->j = bj; 1797 b->i = bi; 1798 b->diag = bdiag; 1799 b->ilen = NULL; 1800 b->imax = NULL; 1801 b->row = isrow; 1802 b->col = iscol; 1803 PetscCall(PetscObjectReference((PetscObject)isrow)); 1804 PetscCall(PetscObjectReference((PetscObject)iscol)); 1805 b->icol = isicol; 1806 1807 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1808 /* In b structure: Free imax, ilen, old a, old j. 1809 Allocate bdiag, solve_work, new a, new j */ 1810 b->maxnz = b->nz = bdiag[0] + 1; 1811 1812 (fact)->info.factor_mallocs = reallocs; 1813 (fact)->info.fill_ratio_given = f; 1814 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1815 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1816 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1817 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1818 PetscFunctionReturn(PETSC_SUCCESS); 1819 } 1820 1821 #if 0 1822 // unused 1823 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1824 { 1825 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1826 IS isicol; 1827 const PetscInt *r, *ic; 1828 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1829 PetscInt *bi, *cols, nnz, *cols_lvl; 1830 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1831 PetscInt i, levels, diagonal_fill; 1832 PetscBool col_identity, row_identity; 1833 PetscReal f; 1834 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1835 PetscBT lnkbt; 1836 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1837 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1838 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1839 PetscBool missing; 1840 1841 PetscFunctionBegin; 1842 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1843 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1844 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1845 1846 f = info->fill; 1847 levels = (PetscInt)info->levels; 1848 diagonal_fill = (PetscInt)info->diagonal_fill; 1849 1850 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1851 1852 PetscCall(ISIdentity(isrow, &row_identity)); 1853 PetscCall(ISIdentity(iscol, &col_identity)); 1854 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1855 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 1856 1857 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1858 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1859 fact->factortype = MAT_FACTOR_ILU; 1860 (fact)->info.factor_mallocs = 0; 1861 (fact)->info.fill_ratio_given = info->fill; 1862 (fact)->info.fill_ratio_needed = 1.0; 1863 1864 b = (Mat_SeqAIJ *)(fact)->data; 1865 b->row = isrow; 1866 b->col = iscol; 1867 b->icol = isicol; 1868 PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 1869 PetscCall(PetscObjectReference((PetscObject)isrow)); 1870 PetscCall(PetscObjectReference((PetscObject)iscol)); 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 PetscCall(ISGetIndices(isrow, &r)); 1875 PetscCall(ISGetIndices(isicol, &ic)); 1876 1877 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1878 PetscCall(PetscMalloc1(n + 1, &bi)); 1879 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1880 bi[0] = bdiag[0] = 0; 1881 1882 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1883 1884 /* create a linked list for storing column indices of the active row */ 1885 nlnk = n + 1; 1886 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1887 1888 /* initial FreeSpace size is f*(ai[n]+1) */ 1889 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1890 current_space = free_space; 1891 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1892 current_space_lvl = free_space_lvl; 1893 1894 for (i = 0; i < n; i++) { 1895 nzi = 0; 1896 /* copy current row into linked list */ 1897 nnz = ai[r[i] + 1] - ai[r[i]]; 1898 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1899 cols = aj + ai[r[i]]; 1900 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1901 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1902 nzi += nlnk; 1903 1904 /* make sure diagonal entry is included */ 1905 if (diagonal_fill && lnk[i] == -1) { 1906 fm = n; 1907 while (lnk[fm] < i) fm = lnk[fm]; 1908 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1909 lnk[fm] = i; 1910 lnk_lvl[i] = 0; 1911 nzi++; 1912 dcount++; 1913 } 1914 1915 /* add pivot rows into the active row */ 1916 nzbd = 0; 1917 prow = lnk[n]; 1918 while (prow < i) { 1919 nnz = bdiag[prow]; 1920 cols = bj_ptr[prow] + nnz + 1; 1921 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1922 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1923 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1924 nzi += nlnk; 1925 prow = lnk[prow]; 1926 nzbd++; 1927 } 1928 bdiag[i] = nzbd; 1929 bi[i + 1] = bi[i] + nzi; 1930 1931 /* if free space is not available, make more free space */ 1932 if (current_space->local_remaining < nzi) { 1933 nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 1934 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1935 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1936 reallocs++; 1937 } 1938 1939 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1940 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1941 bj_ptr[i] = current_space->array; 1942 bjlvl_ptr[i] = current_space_lvl->array; 1943 1944 /* make sure the active row i has diagonal entry */ 1945 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1946 1947 current_space->array += nzi; 1948 current_space->local_used += nzi; 1949 current_space->local_remaining -= nzi; 1950 current_space_lvl->array += nzi; 1951 current_space_lvl->local_used += nzi; 1952 current_space_lvl->local_remaining -= nzi; 1953 } 1954 1955 PetscCall(ISRestoreIndices(isrow, &r)); 1956 PetscCall(ISRestoreIndices(isicol, &ic)); 1957 1958 /* destroy list of free space and other temporary arrays */ 1959 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1960 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 1961 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1962 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1963 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1964 1965 #if defined(PETSC_USE_INFO) 1966 { 1967 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1968 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1969 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1970 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1971 PetscCall(PetscInfo(A, "for best performance.\n")); 1972 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1973 } 1974 #endif 1975 1976 /* put together the new matrix */ 1977 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1978 b = (Mat_SeqAIJ *)(fact)->data; 1979 1980 b->free_a = PETSC_TRUE; 1981 b->free_ij = PETSC_TRUE; 1982 b->singlemalloc = PETSC_FALSE; 1983 1984 PetscCall(PetscMalloc1(bi[n], &b->a)); 1985 b->j = bj; 1986 b->i = bi; 1987 for (i = 0; i < n; i++) bdiag[i] += bi[i]; 1988 b->diag = bdiag; 1989 b->ilen = NULL; 1990 b->imax = NULL; 1991 b->row = isrow; 1992 b->col = iscol; 1993 PetscCall(PetscObjectReference((PetscObject)isrow)); 1994 PetscCall(PetscObjectReference((PetscObject)iscol)); 1995 b->icol = isicol; 1996 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1997 /* In b structure: Free imax, ilen, old a, old j. 1998 Allocate bdiag, solve_work, new a, new j */ 1999 b->maxnz = b->nz = bi[n]; 2000 2001 (fact)->info.factor_mallocs = reallocs; 2002 (fact)->info.fill_ratio_given = f; 2003 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2004 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2005 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2006 PetscFunctionReturn(PETSC_SUCCESS); 2007 } 2008 #endif 2009 2010 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 2011 { 2012 Mat C = B; 2013 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2014 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2015 IS ip = b->row, iip = b->icol; 2016 const PetscInt *rip, *riip; 2017 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 2018 PetscInt *ai = a->i, *aj = a->j; 2019 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 2020 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2021 PetscBool perm_identity; 2022 FactorShiftCtx sctx; 2023 PetscReal rs; 2024 MatScalar d, *v; 2025 2026 PetscFunctionBegin; 2027 /* MatPivotSetUp(): initialize shift context sctx */ 2028 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2029 2030 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2031 sctx.shift_top = info->zeropivot; 2032 for (i = 0; i < mbs; i++) { 2033 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2034 d = (aa)[a->diag[i]]; 2035 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2036 v = aa + ai[i]; 2037 nz = ai[i + 1] - ai[i]; 2038 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2039 if (rs > sctx.shift_top) sctx.shift_top = rs; 2040 } 2041 sctx.shift_top *= 1.1; 2042 sctx.nshift_max = 5; 2043 sctx.shift_lo = 0.; 2044 sctx.shift_hi = 1.; 2045 } 2046 2047 PetscCall(ISGetIndices(ip, &rip)); 2048 PetscCall(ISGetIndices(iip, &riip)); 2049 2050 /* allocate working arrays 2051 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2052 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2053 */ 2054 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 2055 2056 do { 2057 sctx.newshift = PETSC_FALSE; 2058 2059 for (i = 0; i < mbs; i++) c2r[i] = mbs; 2060 if (mbs) il[0] = 0; 2061 2062 for (k = 0; k < mbs; k++) { 2063 /* zero rtmp */ 2064 nz = bi[k + 1] - bi[k]; 2065 bjtmp = bj + bi[k]; 2066 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2067 2068 /* load in initial unfactored row */ 2069 bval = ba + bi[k]; 2070 jmin = ai[rip[k]]; 2071 jmax = ai[rip[k] + 1]; 2072 for (j = jmin; j < jmax; j++) { 2073 col = riip[aj[j]]; 2074 if (col >= k) { /* only take upper triangular entry */ 2075 rtmp[col] = aa[j]; 2076 *bval++ = 0.0; /* for in-place factorization */ 2077 } 2078 } 2079 /* shift the diagonal of the matrix: ZeropivotApply() */ 2080 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2081 2082 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2083 dk = rtmp[k]; 2084 i = c2r[k]; /* first row to be added to k_th row */ 2085 2086 while (i < k) { 2087 nexti = c2r[i]; /* next row to be added to k_th row */ 2088 2089 /* compute multiplier, update diag(k) and U(i,k) */ 2090 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2091 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 2092 dk += uikdi * ba[ili]; /* update diag[k] */ 2093 ba[ili] = uikdi; /* -U(i,k) */ 2094 2095 /* add multiple of row i to k-th row */ 2096 jmin = ili + 1; 2097 jmax = bi[i + 1]; 2098 if (jmin < jmax) { 2099 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2100 /* update il and c2r for row i */ 2101 il[i] = jmin; 2102 j = bj[jmin]; 2103 c2r[i] = c2r[j]; 2104 c2r[j] = i; 2105 } 2106 i = nexti; 2107 } 2108 2109 /* copy data into U(k,:) */ 2110 rs = 0.0; 2111 jmin = bi[k]; 2112 jmax = bi[k + 1] - 1; 2113 if (jmin < jmax) { 2114 for (j = jmin; j < jmax; j++) { 2115 col = bj[j]; 2116 ba[j] = rtmp[col]; 2117 rs += PetscAbsScalar(ba[j]); 2118 } 2119 /* add the k-th row into il and c2r */ 2120 il[k] = jmin; 2121 i = bj[jmin]; 2122 c2r[k] = c2r[i]; 2123 c2r[i] = k; 2124 } 2125 2126 /* MatPivotCheck() */ 2127 sctx.rs = rs; 2128 sctx.pv = dk; 2129 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 2130 if (sctx.newshift) break; 2131 dk = sctx.pv; 2132 2133 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 2134 } 2135 } while (sctx.newshift); 2136 2137 PetscCall(PetscFree3(rtmp, il, c2r)); 2138 PetscCall(ISRestoreIndices(ip, &rip)); 2139 PetscCall(ISRestoreIndices(iip, &riip)); 2140 2141 PetscCall(ISIdentity(ip, &perm_identity)); 2142 if (perm_identity) { 2143 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2144 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2145 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2146 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2147 } else { 2148 B->ops->solve = MatSolve_SeqSBAIJ_1; 2149 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2150 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2151 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2152 } 2153 2154 C->assembled = PETSC_TRUE; 2155 C->preallocated = PETSC_TRUE; 2156 2157 PetscCall(PetscLogFlops(C->rmap->n)); 2158 2159 /* MatPivotView() */ 2160 if (sctx.nshift) { 2161 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2162 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2163 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2164 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2165 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2166 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 2167 } 2168 } 2169 PetscFunctionReturn(PETSC_SUCCESS); 2170 } 2171 2172 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 2173 { 2174 Mat C = B; 2175 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2176 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2177 IS ip = b->row, iip = b->icol; 2178 const PetscInt *rip, *riip; 2179 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 2180 PetscInt *ai = a->i, *aj = a->j; 2181 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2182 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2183 PetscBool perm_identity; 2184 FactorShiftCtx sctx; 2185 PetscReal rs; 2186 MatScalar d, *v; 2187 2188 PetscFunctionBegin; 2189 /* MatPivotSetUp(): initialize shift context sctx */ 2190 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2191 2192 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2193 sctx.shift_top = info->zeropivot; 2194 for (i = 0; i < mbs; i++) { 2195 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2196 d = (aa)[a->diag[i]]; 2197 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2198 v = aa + ai[i]; 2199 nz = ai[i + 1] - ai[i]; 2200 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2201 if (rs > sctx.shift_top) sctx.shift_top = rs; 2202 } 2203 sctx.shift_top *= 1.1; 2204 sctx.nshift_max = 5; 2205 sctx.shift_lo = 0.; 2206 sctx.shift_hi = 1.; 2207 } 2208 2209 PetscCall(ISGetIndices(ip, &rip)); 2210 PetscCall(ISGetIndices(iip, &riip)); 2211 2212 /* initialization */ 2213 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 2214 2215 do { 2216 sctx.newshift = PETSC_FALSE; 2217 2218 for (i = 0; i < mbs; i++) jl[i] = mbs; 2219 il[0] = 0; 2220 2221 for (k = 0; k < mbs; k++) { 2222 /* zero rtmp */ 2223 nz = bi[k + 1] - bi[k]; 2224 bjtmp = bj + bi[k]; 2225 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2226 2227 bval = ba + bi[k]; 2228 /* initialize k-th row by the perm[k]-th row of A */ 2229 jmin = ai[rip[k]]; 2230 jmax = ai[rip[k] + 1]; 2231 for (j = jmin; j < jmax; j++) { 2232 col = riip[aj[j]]; 2233 if (col >= k) { /* only take upper triangular entry */ 2234 rtmp[col] = aa[j]; 2235 *bval++ = 0.0; /* for in-place factorization */ 2236 } 2237 } 2238 /* shift the diagonal of the matrix */ 2239 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2240 2241 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2242 dk = rtmp[k]; 2243 i = jl[k]; /* first row to be added to k_th row */ 2244 2245 while (i < k) { 2246 nexti = jl[i]; /* next row to be added to k_th row */ 2247 2248 /* compute multiplier, update diag(k) and U(i,k) */ 2249 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2250 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 2251 dk += uikdi * ba[ili]; 2252 ba[ili] = uikdi; /* -U(i,k) */ 2253 2254 /* add multiple of row i to k-th row */ 2255 jmin = ili + 1; 2256 jmax = bi[i + 1]; 2257 if (jmin < jmax) { 2258 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2259 /* update il and jl for row i */ 2260 il[i] = jmin; 2261 j = bj[jmin]; 2262 jl[i] = jl[j]; 2263 jl[j] = i; 2264 } 2265 i = nexti; 2266 } 2267 2268 /* shift the diagonals when zero pivot is detected */ 2269 /* compute rs=sum of abs(off-diagonal) */ 2270 rs = 0.0; 2271 jmin = bi[k] + 1; 2272 nz = bi[k + 1] - jmin; 2273 bcol = bj + jmin; 2274 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2275 2276 sctx.rs = rs; 2277 sctx.pv = dk; 2278 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 2279 if (sctx.newshift) break; 2280 dk = sctx.pv; 2281 2282 /* copy data into U(k,:) */ 2283 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 2284 jmin = bi[k] + 1; 2285 jmax = bi[k + 1]; 2286 if (jmin < jmax) { 2287 for (j = jmin; j < jmax; j++) { 2288 col = bj[j]; 2289 ba[j] = rtmp[col]; 2290 } 2291 /* add the k-th row into il and jl */ 2292 il[k] = jmin; 2293 i = bj[jmin]; 2294 jl[k] = jl[i]; 2295 jl[i] = k; 2296 } 2297 } 2298 } while (sctx.newshift); 2299 2300 PetscCall(PetscFree3(rtmp, il, jl)); 2301 PetscCall(ISRestoreIndices(ip, &rip)); 2302 PetscCall(ISRestoreIndices(iip, &riip)); 2303 2304 PetscCall(ISIdentity(ip, &perm_identity)); 2305 if (perm_identity) { 2306 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2307 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2308 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2309 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2310 } else { 2311 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2312 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2313 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2314 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2315 } 2316 2317 C->assembled = PETSC_TRUE; 2318 C->preallocated = PETSC_TRUE; 2319 2320 PetscCall(PetscLogFlops(C->rmap->n)); 2321 if (sctx.nshift) { 2322 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2323 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2324 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2325 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2326 } 2327 } 2328 PetscFunctionReturn(PETSC_SUCCESS); 2329 } 2330 2331 /* 2332 icc() under revised new data structure. 2333 Factored arrays bj and ba are stored as 2334 U(0,:),...,U(i,:),U(n-1,:) 2335 2336 ui=fact->i is an array of size n+1, in which 2337 ui+ 2338 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2339 ui[n]: points to U(n-1,n-1)+1 2340 2341 udiag=fact->diag is an array of size n,in which 2342 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2343 2344 U(i,:) contains udiag[i] as its last entry, i.e., 2345 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2346 */ 2347 2348 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2349 { 2350 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2351 Mat_SeqSBAIJ *b; 2352 PetscBool perm_identity, missing; 2353 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2354 const PetscInt *rip, *riip; 2355 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2356 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2357 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2358 PetscReal fill = info->fill, levels = info->levels; 2359 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2360 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2361 PetscBT lnkbt; 2362 IS iperm; 2363 2364 PetscFunctionBegin; 2365 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2366 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2367 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2368 PetscCall(ISIdentity(perm, &perm_identity)); 2369 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2370 2371 PetscCall(PetscMalloc1(am + 1, &ui)); 2372 PetscCall(PetscMalloc1(am + 1, &udiag)); 2373 ui[0] = 0; 2374 2375 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2376 if (!levels && perm_identity) { 2377 for (i = 0; i < am; i++) { 2378 ncols = ai[i + 1] - a->diag[i]; 2379 ui[i + 1] = ui[i] + ncols; 2380 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2381 } 2382 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2383 cols = uj; 2384 for (i = 0; i < am; i++) { 2385 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2386 ncols = ai[i + 1] - a->diag[i] - 1; 2387 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2388 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2389 } 2390 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2391 PetscCall(ISGetIndices(iperm, &riip)); 2392 PetscCall(ISGetIndices(perm, &rip)); 2393 2394 /* initialization */ 2395 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2396 2397 /* jl: linked list for storing indices of the pivot rows 2398 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2399 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2400 for (i = 0; i < am; i++) { 2401 jl[i] = am; 2402 il[i] = 0; 2403 } 2404 2405 /* create and initialize a linked list for storing column indices of the active row k */ 2406 nlnk = am + 1; 2407 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2408 2409 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2410 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2411 current_space = free_space; 2412 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2413 current_space_lvl = free_space_lvl; 2414 2415 for (k = 0; k < am; k++) { /* for each active row k */ 2416 /* initialize lnk by the column indices of row rip[k] of A */ 2417 nzk = 0; 2418 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2419 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2420 ncols_upper = 0; 2421 for (j = 0; j < ncols; j++) { 2422 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2423 if (riip[i] >= k) { /* only take upper triangular entry */ 2424 ajtmp[ncols_upper] = i; 2425 ncols_upper++; 2426 } 2427 } 2428 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2429 nzk += nlnk; 2430 2431 /* update lnk by computing fill-in for each pivot row to be merged in */ 2432 prow = jl[k]; /* 1st pivot row */ 2433 2434 while (prow < k) { 2435 nextprow = jl[prow]; 2436 2437 /* merge prow into k-th row */ 2438 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2439 jmax = ui[prow + 1]; 2440 ncols = jmax - jmin; 2441 i = jmin - ui[prow]; 2442 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2443 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2444 j = *(uj - 1); 2445 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2446 nzk += nlnk; 2447 2448 /* update il and jl for prow */ 2449 if (jmin < jmax) { 2450 il[prow] = jmin; 2451 j = *cols; 2452 jl[prow] = jl[j]; 2453 jl[j] = prow; 2454 } 2455 prow = nextprow; 2456 } 2457 2458 /* if free space is not available, make more free space */ 2459 if (current_space->local_remaining < nzk) { 2460 i = am - k + 1; /* num of unfactored rows */ 2461 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2462 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2463 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2464 reallocs++; 2465 } 2466 2467 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2468 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2469 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2470 2471 /* add the k-th row into il and jl */ 2472 if (nzk > 1) { 2473 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2474 jl[k] = jl[i]; 2475 jl[i] = k; 2476 il[k] = ui[k] + 1; 2477 } 2478 uj_ptr[k] = current_space->array; 2479 uj_lvl_ptr[k] = current_space_lvl->array; 2480 2481 current_space->array += nzk; 2482 current_space->local_used += nzk; 2483 current_space->local_remaining -= nzk; 2484 2485 current_space_lvl->array += nzk; 2486 current_space_lvl->local_used += nzk; 2487 current_space_lvl->local_remaining -= nzk; 2488 2489 ui[k + 1] = ui[k] + nzk; 2490 } 2491 2492 PetscCall(ISRestoreIndices(perm, &rip)); 2493 PetscCall(ISRestoreIndices(iperm, &riip)); 2494 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2495 PetscCall(PetscFree(ajtmp)); 2496 2497 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2498 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2499 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2500 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2501 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2502 2503 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2504 2505 /* put together the new matrix in MATSEQSBAIJ format */ 2506 b = (Mat_SeqSBAIJ *)(fact)->data; 2507 b->singlemalloc = PETSC_FALSE; 2508 2509 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2510 2511 b->j = uj; 2512 b->i = ui; 2513 b->diag = udiag; 2514 b->free_diag = PETSC_TRUE; 2515 b->ilen = NULL; 2516 b->imax = NULL; 2517 b->row = perm; 2518 b->col = perm; 2519 PetscCall(PetscObjectReference((PetscObject)perm)); 2520 PetscCall(PetscObjectReference((PetscObject)perm)); 2521 b->icol = iperm; 2522 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2523 2524 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2525 2526 b->maxnz = b->nz = ui[am]; 2527 b->free_a = PETSC_TRUE; 2528 b->free_ij = PETSC_TRUE; 2529 2530 fact->info.factor_mallocs = reallocs; 2531 fact->info.fill_ratio_given = fill; 2532 if (ai[am] != 0) { 2533 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2534 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2535 } else { 2536 fact->info.fill_ratio_needed = 0.0; 2537 } 2538 #if defined(PETSC_USE_INFO) 2539 if (ai[am] != 0) { 2540 PetscReal af = fact->info.fill_ratio_needed; 2541 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2542 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2543 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2544 } else { 2545 PetscCall(PetscInfo(A, "Empty matrix\n")); 2546 } 2547 #endif 2548 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2549 PetscFunctionReturn(PETSC_SUCCESS); 2550 } 2551 2552 #if 0 2553 // unused 2554 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2555 { 2556 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2557 Mat_SeqSBAIJ *b; 2558 PetscBool perm_identity, missing; 2559 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2560 const PetscInt *rip, *riip; 2561 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2562 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2563 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2564 PetscReal fill = info->fill, levels = info->levels; 2565 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2566 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2567 PetscBT lnkbt; 2568 IS iperm; 2569 2570 PetscFunctionBegin; 2571 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2572 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2573 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2574 PetscCall(ISIdentity(perm, &perm_identity)); 2575 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2576 2577 PetscCall(PetscMalloc1(am + 1, &ui)); 2578 PetscCall(PetscMalloc1(am + 1, &udiag)); 2579 ui[0] = 0; 2580 2581 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2582 if (!levels && perm_identity) { 2583 for (i = 0; i < am; i++) { 2584 ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 2585 udiag[i] = ui[i]; 2586 } 2587 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2588 cols = uj; 2589 for (i = 0; i < am; i++) { 2590 aj = a->j + a->diag[i]; 2591 ncols = ui[i + 1] - ui[i]; 2592 for (j = 0; j < ncols; j++) *cols++ = *aj++; 2593 } 2594 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2595 PetscCall(ISGetIndices(iperm, &riip)); 2596 PetscCall(ISGetIndices(perm, &rip)); 2597 2598 /* initialization */ 2599 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2600 2601 /* jl: linked list for storing indices of the pivot rows 2602 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2603 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2604 for (i = 0; i < am; i++) { 2605 jl[i] = am; 2606 il[i] = 0; 2607 } 2608 2609 /* create and initialize a linked list for storing column indices of the active row k */ 2610 nlnk = am + 1; 2611 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2612 2613 /* initial FreeSpace size is fill*(ai[am]+1) */ 2614 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2615 current_space = free_space; 2616 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 2617 current_space_lvl = free_space_lvl; 2618 2619 for (k = 0; k < am; k++) { /* for each active row k */ 2620 /* initialize lnk by the column indices of row rip[k] of A */ 2621 nzk = 0; 2622 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2623 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2624 ncols_upper = 0; 2625 for (j = 0; j < ncols; j++) { 2626 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2627 if (riip[i] >= k) { /* only take upper triangular entry */ 2628 ajtmp[ncols_upper] = i; 2629 ncols_upper++; 2630 } 2631 } 2632 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2633 nzk += nlnk; 2634 2635 /* update lnk by computing fill-in for each pivot row to be merged in */ 2636 prow = jl[k]; /* 1st pivot row */ 2637 2638 while (prow < k) { 2639 nextprow = jl[prow]; 2640 2641 /* merge prow into k-th row */ 2642 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2643 jmax = ui[prow + 1]; 2644 ncols = jmax - jmin; 2645 i = jmin - ui[prow]; 2646 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2647 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2648 j = *(uj - 1); 2649 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2650 nzk += nlnk; 2651 2652 /* update il and jl for prow */ 2653 if (jmin < jmax) { 2654 il[prow] = jmin; 2655 j = *cols; 2656 jl[prow] = jl[j]; 2657 jl[j] = prow; 2658 } 2659 prow = nextprow; 2660 } 2661 2662 /* if free space is not available, make more free space */ 2663 if (current_space->local_remaining < nzk) { 2664 i = am - k + 1; /* num of unfactored rows */ 2665 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2666 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2667 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2668 reallocs++; 2669 } 2670 2671 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2672 PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2673 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2674 2675 /* add the k-th row into il and jl */ 2676 if (nzk > 1) { 2677 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2678 jl[k] = jl[i]; 2679 jl[i] = k; 2680 il[k] = ui[k] + 1; 2681 } 2682 uj_ptr[k] = current_space->array; 2683 uj_lvl_ptr[k] = current_space_lvl->array; 2684 2685 current_space->array += nzk; 2686 current_space->local_used += nzk; 2687 current_space->local_remaining -= nzk; 2688 2689 current_space_lvl->array += nzk; 2690 current_space_lvl->local_used += nzk; 2691 current_space_lvl->local_remaining -= nzk; 2692 2693 ui[k + 1] = ui[k] + nzk; 2694 } 2695 2696 #if defined(PETSC_USE_INFO) 2697 if (ai[am] != 0) { 2698 PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 2699 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2700 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2701 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2702 } else { 2703 PetscCall(PetscInfo(A, "Empty matrix\n")); 2704 } 2705 #endif 2706 2707 PetscCall(ISRestoreIndices(perm, &rip)); 2708 PetscCall(ISRestoreIndices(iperm, &riip)); 2709 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2710 PetscCall(PetscFree(ajtmp)); 2711 2712 /* destroy list of free space and other temporary array(s) */ 2713 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2714 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 2715 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2716 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2717 2718 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2719 2720 /* put together the new matrix in MATSEQSBAIJ format */ 2721 2722 b = (Mat_SeqSBAIJ *)fact->data; 2723 b->singlemalloc = PETSC_FALSE; 2724 2725 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2726 2727 b->j = uj; 2728 b->i = ui; 2729 b->diag = udiag; 2730 b->free_diag = PETSC_TRUE; 2731 b->ilen = NULL; 2732 b->imax = NULL; 2733 b->row = perm; 2734 b->col = perm; 2735 2736 PetscCall(PetscObjectReference((PetscObject)perm)); 2737 PetscCall(PetscObjectReference((PetscObject)perm)); 2738 2739 b->icol = iperm; 2740 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2741 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2742 b->maxnz = b->nz = ui[am]; 2743 b->free_a = PETSC_TRUE; 2744 b->free_ij = PETSC_TRUE; 2745 2746 fact->info.factor_mallocs = reallocs; 2747 fact->info.fill_ratio_given = fill; 2748 if (ai[am] != 0) { 2749 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 2750 } else { 2751 fact->info.fill_ratio_needed = 0.0; 2752 } 2753 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2754 PetscFunctionReturn(PETSC_SUCCESS); 2755 } 2756 #endif 2757 2758 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2759 { 2760 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2761 Mat_SeqSBAIJ *b; 2762 PetscBool perm_identity, missing; 2763 PetscReal fill = info->fill; 2764 const PetscInt *rip, *riip; 2765 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2766 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2767 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2768 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2769 PetscBT lnkbt; 2770 IS iperm; 2771 2772 PetscFunctionBegin; 2773 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2774 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2775 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2776 2777 /* check whether perm is the identity mapping */ 2778 PetscCall(ISIdentity(perm, &perm_identity)); 2779 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2780 PetscCall(ISGetIndices(iperm, &riip)); 2781 PetscCall(ISGetIndices(perm, &rip)); 2782 2783 /* initialization */ 2784 PetscCall(PetscMalloc1(am + 1, &ui)); 2785 PetscCall(PetscMalloc1(am + 1, &udiag)); 2786 ui[0] = 0; 2787 2788 /* jl: linked list for storing indices of the pivot rows 2789 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2790 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2791 for (i = 0; i < am; i++) { 2792 jl[i] = am; 2793 il[i] = 0; 2794 } 2795 2796 /* create and initialize a linked list for storing column indices of the active row k */ 2797 nlnk = am + 1; 2798 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2799 2800 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2801 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2802 current_space = free_space; 2803 2804 for (k = 0; k < am; k++) { /* for each active row k */ 2805 /* initialize lnk by the column indices of row rip[k] of A */ 2806 nzk = 0; 2807 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2808 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2809 ncols_upper = 0; 2810 for (j = 0; j < ncols; j++) { 2811 i = riip[*(aj + ai[rip[k]] + j)]; 2812 if (i >= k) { /* only take upper triangular entry */ 2813 cols[ncols_upper] = i; 2814 ncols_upper++; 2815 } 2816 } 2817 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2818 nzk += nlnk; 2819 2820 /* update lnk by computing fill-in for each pivot row to be merged in */ 2821 prow = jl[k]; /* 1st pivot row */ 2822 2823 while (prow < k) { 2824 nextprow = jl[prow]; 2825 /* merge prow into k-th row */ 2826 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2827 jmax = ui[prow + 1]; 2828 ncols = jmax - jmin; 2829 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2830 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2831 nzk += nlnk; 2832 2833 /* update il and jl for prow */ 2834 if (jmin < jmax) { 2835 il[prow] = jmin; 2836 j = *uj_ptr; 2837 jl[prow] = jl[j]; 2838 jl[j] = prow; 2839 } 2840 prow = nextprow; 2841 } 2842 2843 /* if free space is not available, make more free space */ 2844 if (current_space->local_remaining < nzk) { 2845 i = am - k + 1; /* num of unfactored rows */ 2846 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2847 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2848 reallocs++; 2849 } 2850 2851 /* copy data into free space, then initialize lnk */ 2852 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2853 2854 /* add the k-th row into il and jl */ 2855 if (nzk > 1) { 2856 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2857 jl[k] = jl[i]; 2858 jl[i] = k; 2859 il[k] = ui[k] + 1; 2860 } 2861 ui_ptr[k] = current_space->array; 2862 2863 current_space->array += nzk; 2864 current_space->local_used += nzk; 2865 current_space->local_remaining -= nzk; 2866 2867 ui[k + 1] = ui[k] + nzk; 2868 } 2869 2870 PetscCall(ISRestoreIndices(perm, &rip)); 2871 PetscCall(ISRestoreIndices(iperm, &riip)); 2872 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2873 2874 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2875 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2876 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2877 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2878 2879 /* put together the new matrix in MATSEQSBAIJ format */ 2880 2881 b = (Mat_SeqSBAIJ *)fact->data; 2882 b->singlemalloc = PETSC_FALSE; 2883 b->free_a = PETSC_TRUE; 2884 b->free_ij = PETSC_TRUE; 2885 2886 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2887 2888 b->j = uj; 2889 b->i = ui; 2890 b->diag = udiag; 2891 b->free_diag = PETSC_TRUE; 2892 b->ilen = NULL; 2893 b->imax = NULL; 2894 b->row = perm; 2895 b->col = perm; 2896 2897 PetscCall(PetscObjectReference((PetscObject)perm)); 2898 PetscCall(PetscObjectReference((PetscObject)perm)); 2899 2900 b->icol = iperm; 2901 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2902 2903 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2904 2905 b->maxnz = b->nz = ui[am]; 2906 2907 fact->info.factor_mallocs = reallocs; 2908 fact->info.fill_ratio_given = fill; 2909 if (ai[am] != 0) { 2910 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2911 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2912 } else { 2913 fact->info.fill_ratio_needed = 0.0; 2914 } 2915 #if defined(PETSC_USE_INFO) 2916 if (ai[am] != 0) { 2917 PetscReal af = fact->info.fill_ratio_needed; 2918 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2919 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2920 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2921 } else { 2922 PetscCall(PetscInfo(A, "Empty matrix\n")); 2923 } 2924 #endif 2925 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2926 PetscFunctionReturn(PETSC_SUCCESS); 2927 } 2928 2929 #if 0 2930 // unused 2931 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2932 { 2933 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2934 Mat_SeqSBAIJ *b; 2935 PetscBool perm_identity, missing; 2936 PetscReal fill = info->fill; 2937 const PetscInt *rip, *riip; 2938 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2939 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2940 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 2941 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2942 PetscBT lnkbt; 2943 IS iperm; 2944 2945 PetscFunctionBegin; 2946 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2947 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2948 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2949 2950 /* check whether perm is the identity mapping */ 2951 PetscCall(ISIdentity(perm, &perm_identity)); 2952 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2953 PetscCall(ISGetIndices(iperm, &riip)); 2954 PetscCall(ISGetIndices(perm, &rip)); 2955 2956 /* initialization */ 2957 PetscCall(PetscMalloc1(am + 1, &ui)); 2958 ui[0] = 0; 2959 2960 /* jl: linked list for storing indices of the pivot rows 2961 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2962 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2963 for (i = 0; i < am; i++) { 2964 jl[i] = am; 2965 il[i] = 0; 2966 } 2967 2968 /* create and initialize a linked list for storing column indices of the active row k */ 2969 nlnk = am + 1; 2970 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2971 2972 /* initial FreeSpace size is fill*(ai[am]+1) */ 2973 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2974 current_space = free_space; 2975 2976 for (k = 0; k < am; k++) { /* for each active row k */ 2977 /* initialize lnk by the column indices of row rip[k] of A */ 2978 nzk = 0; 2979 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2980 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2981 ncols_upper = 0; 2982 for (j = 0; j < ncols; j++) { 2983 i = riip[*(aj + ai[rip[k]] + j)]; 2984 if (i >= k) { /* only take upper triangular entry */ 2985 cols[ncols_upper] = i; 2986 ncols_upper++; 2987 } 2988 } 2989 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2990 nzk += nlnk; 2991 2992 /* update lnk by computing fill-in for each pivot row to be merged in */ 2993 prow = jl[k]; /* 1st pivot row */ 2994 2995 while (prow < k) { 2996 nextprow = jl[prow]; 2997 /* merge prow into k-th row */ 2998 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2999 jmax = ui[prow + 1]; 3000 ncols = jmax - jmin; 3001 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3002 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 3003 nzk += nlnk; 3004 3005 /* update il and jl for prow */ 3006 if (jmin < jmax) { 3007 il[prow] = jmin; 3008 j = *uj_ptr; 3009 jl[prow] = jl[j]; 3010 jl[j] = prow; 3011 } 3012 prow = nextprow; 3013 } 3014 3015 /* if free space is not available, make more free space */ 3016 if (current_space->local_remaining < nzk) { 3017 i = am - k + 1; /* num of unfactored rows */ 3018 i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3019 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 3020 reallocs++; 3021 } 3022 3023 /* copy data into free space, then initialize lnk */ 3024 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 3025 3026 /* add the k-th row into il and jl */ 3027 if (nzk - 1 > 0) { 3028 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3029 jl[k] = jl[i]; 3030 jl[i] = k; 3031 il[k] = ui[k] + 1; 3032 } 3033 ui_ptr[k] = current_space->array; 3034 3035 current_space->array += nzk; 3036 current_space->local_used += nzk; 3037 current_space->local_remaining -= nzk; 3038 3039 ui[k + 1] = ui[k] + nzk; 3040 } 3041 3042 #if defined(PETSC_USE_INFO) 3043 if (ai[am] != 0) { 3044 PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]); 3045 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3046 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3047 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3048 } else { 3049 PetscCall(PetscInfo(A, "Empty matrix\n")); 3050 } 3051 #endif 3052 3053 PetscCall(ISRestoreIndices(perm, &rip)); 3054 PetscCall(ISRestoreIndices(iperm, &riip)); 3055 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 3056 3057 /* destroy list of free space and other temporary array(s) */ 3058 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 3059 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 3060 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3061 3062 /* put together the new matrix in MATSEQSBAIJ format */ 3063 3064 b = (Mat_SeqSBAIJ *)fact->data; 3065 b->singlemalloc = PETSC_FALSE; 3066 b->free_a = PETSC_TRUE; 3067 b->free_ij = PETSC_TRUE; 3068 3069 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 3070 3071 b->j = uj; 3072 b->i = ui; 3073 b->diag = NULL; 3074 b->ilen = NULL; 3075 b->imax = NULL; 3076 b->row = perm; 3077 b->col = perm; 3078 3079 PetscCall(PetscObjectReference((PetscObject)perm)); 3080 PetscCall(PetscObjectReference((PetscObject)perm)); 3081 3082 b->icol = iperm; 3083 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3084 3085 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 3086 b->maxnz = b->nz = ui[am]; 3087 3088 fact->info.factor_mallocs = reallocs; 3089 fact->info.fill_ratio_given = fill; 3090 if (ai[am] != 0) { 3091 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 3092 } else { 3093 fact->info.fill_ratio_needed = 0.0; 3094 } 3095 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3096 PetscFunctionReturn(PETSC_SUCCESS); 3097 } 3098 #endif 3099 3100 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 3101 { 3102 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3103 PetscInt n = A->rmap->n; 3104 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3105 PetscScalar *x, sum; 3106 const PetscScalar *b; 3107 const MatScalar *aa = a->a, *v; 3108 PetscInt i, nz; 3109 3110 PetscFunctionBegin; 3111 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3112 3113 PetscCall(VecGetArrayRead(bb, &b)); 3114 PetscCall(VecGetArrayWrite(xx, &x)); 3115 3116 /* forward solve the lower triangular */ 3117 x[0] = b[0]; 3118 v = aa; 3119 vi = aj; 3120 for (i = 1; i < n; i++) { 3121 nz = ai[i + 1] - ai[i]; 3122 sum = b[i]; 3123 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3124 v += nz; 3125 vi += nz; 3126 x[i] = sum; 3127 } 3128 3129 /* backward solve the upper triangular */ 3130 for (i = n - 1; i >= 0; i--) { 3131 v = aa + adiag[i + 1] + 1; 3132 vi = aj + adiag[i + 1] + 1; 3133 nz = adiag[i] - adiag[i + 1] - 1; 3134 sum = x[i]; 3135 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3136 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3137 } 3138 3139 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3140 PetscCall(VecRestoreArrayRead(bb, &b)); 3141 PetscCall(VecRestoreArrayWrite(xx, &x)); 3142 PetscFunctionReturn(PETSC_SUCCESS); 3143 } 3144 3145 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 3146 { 3147 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3148 IS iscol = a->col, isrow = a->row; 3149 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3150 const PetscInt *rout, *cout, *r, *c; 3151 PetscScalar *x, *tmp, sum; 3152 const PetscScalar *b; 3153 const MatScalar *aa = a->a, *v; 3154 3155 PetscFunctionBegin; 3156 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3157 3158 PetscCall(VecGetArrayRead(bb, &b)); 3159 PetscCall(VecGetArrayWrite(xx, &x)); 3160 tmp = a->solve_work; 3161 3162 PetscCall(ISGetIndices(isrow, &rout)); 3163 r = rout; 3164 PetscCall(ISGetIndices(iscol, &cout)); 3165 c = cout; 3166 3167 /* forward solve the lower triangular */ 3168 tmp[0] = b[r[0]]; 3169 v = aa; 3170 vi = aj; 3171 for (i = 1; i < n; i++) { 3172 nz = ai[i + 1] - ai[i]; 3173 sum = b[r[i]]; 3174 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3175 tmp[i] = sum; 3176 v += nz; 3177 vi += nz; 3178 } 3179 3180 /* backward solve the upper triangular */ 3181 for (i = n - 1; i >= 0; i--) { 3182 v = aa + adiag[i + 1] + 1; 3183 vi = aj + adiag[i + 1] + 1; 3184 nz = adiag[i] - adiag[i + 1] - 1; 3185 sum = tmp[i]; 3186 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3187 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3188 } 3189 3190 PetscCall(ISRestoreIndices(isrow, &rout)); 3191 PetscCall(ISRestoreIndices(iscol, &cout)); 3192 PetscCall(VecRestoreArrayRead(bb, &b)); 3193 PetscCall(VecRestoreArrayWrite(xx, &x)); 3194 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3195 PetscFunctionReturn(PETSC_SUCCESS); 3196 } 3197 3198 #if 0 3199 // unused 3200 /* 3201 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3202 */ 3203 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) 3204 { 3205 Mat B = *fact; 3206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3207 IS isicol; 3208 const PetscInt *r, *ic; 3209 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3210 PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3211 PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 3212 PetscInt nlnk, *lnk; 3213 PetscBT lnkbt; 3214 PetscBool row_identity, icol_identity; 3215 MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 3216 const PetscInt *ics; 3217 PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3218 PetscReal dt = info->dt, shift = info->shiftamount; 3219 PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3220 PetscBool missing; 3221 3222 PetscFunctionBegin; 3223 if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005; 3224 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3225 3226 /* symbolic factorization, can be reused */ 3227 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3228 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3229 adiag = a->diag; 3230 3231 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3232 3233 /* bdiag is location of diagonal in factor */ 3234 PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 3235 PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3236 3237 /* allocate row pointers bi */ 3238 PetscCall(PetscMalloc1(2 * n + 2, &bi)); 3239 3240 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3241 if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 3242 nnz_max = ai[n] + 2 * n * dtcount + 2; 3243 3244 PetscCall(PetscMalloc1(nnz_max + 1, &bj)); 3245 PetscCall(PetscMalloc1(nnz_max + 1, &ba)); 3246 3247 /* put together the new matrix */ 3248 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3249 b = (Mat_SeqAIJ *)B->data; 3250 3251 b->free_a = PETSC_TRUE; 3252 b->free_ij = PETSC_TRUE; 3253 b->singlemalloc = PETSC_FALSE; 3254 3255 b->a = ba; 3256 b->j = bj; 3257 b->i = bi; 3258 b->diag = bdiag; 3259 b->ilen = NULL; 3260 b->imax = NULL; 3261 b->row = isrow; 3262 b->col = iscol; 3263 PetscCall(PetscObjectReference((PetscObject)isrow)); 3264 PetscCall(PetscObjectReference((PetscObject)iscol)); 3265 b->icol = isicol; 3266 3267 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3268 b->maxnz = nnz_max; 3269 3270 B->factortype = MAT_FACTOR_ILUDT; 3271 B->info.factor_mallocs = 0; 3272 B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 3273 /* end of symbolic factorization */ 3274 3275 PetscCall(ISGetIndices(isrow, &r)); 3276 PetscCall(ISGetIndices(isicol, &ic)); 3277 ics = ic; 3278 3279 /* linked list for storing column indices of the active row */ 3280 nlnk = n + 1; 3281 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3282 3283 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3284 PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 3285 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3286 PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 3287 PetscCall(PetscArrayzero(rtmp, n)); 3288 3289 bi[0] = 0; 3290 bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3291 bdiag_rev[n] = bdiag[0]; 3292 bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3293 for (i = 0; i < n; i++) { 3294 /* copy initial fill into linked list */ 3295 nzi = ai[r[i] + 1] - ai[r[i]]; 3296 PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 3297 nzi_al = adiag[r[i]] - ai[r[i]]; 3298 nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3299 ajtmp = aj + ai[r[i]]; 3300 PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3301 3302 /* load in initial (unfactored row) */ 3303 aatmp = a->a + ai[r[i]]; 3304 for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3305 3306 /* add pivot rows into linked list */ 3307 row = lnk[n]; 3308 while (row < i) { 3309 nzi_bl = bi[row + 1] - bi[row] + 1; 3310 bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 3311 PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3312 nzi += nlnk; 3313 row = lnk[row]; 3314 } 3315 3316 /* copy data from lnk into jtmp, then initialize lnk */ 3317 PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3318 3319 /* numerical factorization */ 3320 bjtmp = jtmp; 3321 row = *bjtmp++; /* 1st pivot row */ 3322 while (row < i) { 3323 pc = rtmp + row; 3324 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3325 multiplier = (*pc) * (*pv); 3326 *pc = multiplier; 3327 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3328 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3329 pv = ba + bdiag[row + 1] + 1; 3330 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3331 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3332 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3333 } 3334 row = *bjtmp++; 3335 } 3336 3337 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3338 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3339 nzi_bl = 0; 3340 j = 0; 3341 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3342 vtmp[j] = rtmp[jtmp[j]]; 3343 rtmp[jtmp[j]] = 0.0; 3344 nzi_bl++; 3345 j++; 3346 } 3347 nzi_bu = nzi - nzi_bl - 1; 3348 while (j < nzi) { 3349 vtmp[j] = rtmp[jtmp[j]]; 3350 rtmp[jtmp[j]] = 0.0; 3351 j++; 3352 } 3353 3354 bjtmp = bj + bi[i]; 3355 batmp = ba + bi[i]; 3356 /* apply level dropping rule to L part */ 3357 ncut = nzi_al + dtcount; 3358 if (ncut < nzi_bl) { 3359 PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 3360 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3361 } else { 3362 ncut = nzi_bl; 3363 } 3364 for (j = 0; j < ncut; j++) { 3365 bjtmp[j] = jtmp[j]; 3366 batmp[j] = vtmp[j]; 3367 } 3368 bi[i + 1] = bi[i] + ncut; 3369 nzi = ncut + 1; 3370 3371 /* apply level dropping rule to U part */ 3372 ncut = nzi_au + dtcount; 3373 if (ncut < nzi_bu) { 3374 PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 3375 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 3376 } else { 3377 ncut = nzi_bu; 3378 } 3379 nzi += ncut; 3380 3381 /* mark bdiagonal */ 3382 bdiag[i + 1] = bdiag[i] - (ncut + 1); 3383 bdiag_rev[n - i - 1] = bdiag[i + 1]; 3384 bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 3385 bjtmp = bj + bdiag[i]; 3386 batmp = ba + bdiag[i]; 3387 *bjtmp = i; 3388 *batmp = diag_tmp; /* rtmp[i]; */ 3389 if (*batmp == 0.0) *batmp = dt + shift; 3390 *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 3391 3392 bjtmp = bj + bdiag[i + 1] + 1; 3393 batmp = ba + bdiag[i + 1] + 1; 3394 for (k = 0; k < ncut; k++) { 3395 bjtmp[k] = jtmp[nzi_bl + 1 + k]; 3396 batmp[k] = vtmp[nzi_bl + 1 + k]; 3397 } 3398 3399 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3400 } /* for (i=0; i<n; i++) */ 3401 PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3402 3403 PetscCall(ISRestoreIndices(isrow, &r)); 3404 PetscCall(ISRestoreIndices(isicol, &ic)); 3405 3406 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3407 PetscCall(PetscFree2(im, jtmp)); 3408 PetscCall(PetscFree2(rtmp, vtmp)); 3409 PetscCall(PetscFree(bdiag_rev)); 3410 3411 PetscCall(PetscLogFlops(B->cmap->n)); 3412 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3413 3414 PetscCall(ISIdentity(isrow, &row_identity)); 3415 PetscCall(ISIdentity(isicol, &icol_identity)); 3416 if (row_identity && icol_identity) { 3417 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3418 } else { 3419 B->ops->solve = MatSolve_SeqAIJ; 3420 } 3421 3422 B->ops->solveadd = NULL; 3423 B->ops->solvetranspose = NULL; 3424 B->ops->solvetransposeadd = NULL; 3425 B->ops->matsolve = NULL; 3426 B->assembled = PETSC_TRUE; 3427 B->preallocated = PETSC_TRUE; 3428 PetscFunctionReturn(PETSC_SUCCESS); 3429 } 3430 3431 /* a wrapper of MatILUDTFactor_SeqAIJ() */ 3432 /* 3433 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3434 */ 3435 3436 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 3437 { 3438 PetscFunctionBegin; 3439 PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 3440 PetscFunctionReturn(PETSC_SUCCESS); 3441 } 3442 3443 /* 3444 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3445 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3446 */ 3447 /* 3448 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3449 */ 3450 3451 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) 3452 { 3453 Mat C = fact; 3454 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 3455 IS isrow = b->row, isicol = b->icol; 3456 const PetscInt *r, *ic, *ics; 3457 PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3458 PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 3459 MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3460 PetscReal dt = info->dt, shift = info->shiftamount; 3461 PetscBool row_identity, col_identity; 3462 3463 PetscFunctionBegin; 3464 PetscCall(ISGetIndices(isrow, &r)); 3465 PetscCall(ISGetIndices(isicol, &ic)); 3466 PetscCall(PetscMalloc1(n + 1, &rtmp)); 3467 ics = ic; 3468 3469 for (i = 0; i < n; i++) { 3470 /* initialize rtmp array */ 3471 nzl = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */ 3472 bjtmp = bj + bi[i]; 3473 for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3474 rtmp[i] = 0.0; 3475 nzu = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */ 3476 bjtmp = bj + bdiag[i + 1] + 1; 3477 for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 3478 3479 /* load in initial unfactored row of A */ 3480 nz = ai[r[i] + 1] - ai[r[i]]; 3481 ajtmp = aj + ai[r[i]]; 3482 v = aa + ai[r[i]]; 3483 for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 3484 3485 /* numerical factorization */ 3486 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3487 nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3488 k = 0; 3489 while (k < nzl) { 3490 row = *bjtmp++; 3491 pc = rtmp + row; 3492 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3493 multiplier = (*pc) * (*pv); 3494 *pc = multiplier; 3495 if (PetscAbsScalar(multiplier) > dt) { 3496 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3497 pv = b->a + bdiag[row + 1] + 1; 3498 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3499 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3500 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3501 } 3502 k++; 3503 } 3504 3505 /* finished row so stick it into b->a */ 3506 /* L-part */ 3507 pv = b->a + bi[i]; 3508 pj = bj + bi[i]; 3509 nzl = bi[i + 1] - bi[i]; 3510 for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3511 3512 /* diagonal: invert diagonal entries for simpler triangular solves */ 3513 if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3514 b->a[bdiag[i]] = 1.0 / rtmp[i]; 3515 3516 /* U-part */ 3517 pv = b->a + bdiag[i + 1] + 1; 3518 pj = bj + bdiag[i + 1] + 1; 3519 nzu = bdiag[i] - bdiag[i + 1] - 1; 3520 for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3521 } 3522 3523 PetscCall(PetscFree(rtmp)); 3524 PetscCall(ISRestoreIndices(isicol, &ic)); 3525 PetscCall(ISRestoreIndices(isrow, &r)); 3526 3527 PetscCall(ISIdentity(isrow, &row_identity)); 3528 PetscCall(ISIdentity(isicol, &col_identity)); 3529 if (row_identity && col_identity) { 3530 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3531 } else { 3532 C->ops->solve = MatSolve_SeqAIJ; 3533 } 3534 C->ops->solveadd = NULL; 3535 C->ops->solvetranspose = NULL; 3536 C->ops->solvetransposeadd = NULL; 3537 C->ops->matsolve = NULL; 3538 C->assembled = PETSC_TRUE; 3539 C->preallocated = PETSC_TRUE; 3540 3541 PetscCall(PetscLogFlops(C->cmap->n)); 3542 PetscFunctionReturn(PETSC_SUCCESS); 3543 } 3544 #endif 3545