1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 /* 8 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9 10 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 11 */ 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) { 13 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 14 PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 15 const PetscInt *ai = a->i, *aj = a->j; 16 const PetscScalar *aa = a->a; 17 PetscBool *done; 18 PetscReal best, past = 0, future; 19 20 PetscFunctionBegin; 21 /* pick initial row */ 22 best = -1; 23 for (i = 0; i < n; i++) { 24 future = 0.0; 25 for (j = ai[i]; j < ai[i + 1]; j++) { 26 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 27 else past = PetscAbsScalar(aa[j]); 28 } 29 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 30 if (past / future > best) { 31 best = past / future; 32 current = i; 33 } 34 } 35 36 PetscCall(PetscMalloc1(n, &done)); 37 PetscCall(PetscArrayzero(done, n)); 38 PetscCall(PetscMalloc1(n, &order)); 39 order[0] = current; 40 for (i = 0; i < n - 1; i++) { 41 done[current] = PETSC_TRUE; 42 best = -1; 43 /* loop over all neighbors of current pivot */ 44 for (j = ai[current]; j < ai[current + 1]; j++) { 45 jj = aj[j]; 46 if (done[jj]) continue; 47 /* loop over columns of potential next row computing weights for below and above diagonal */ 48 past = future = 0.0; 49 for (k = ai[jj]; k < ai[jj + 1]; k++) { 50 kk = aj[k]; 51 if (done[kk]) past += PetscAbsScalar(aa[k]); 52 else if (kk != jj) future += PetscAbsScalar(aa[k]); 53 } 54 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 55 if (past / future > best) { 56 best = past / future; 57 newcurrent = jj; 58 } 59 } 60 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 61 best = -1; 62 for (k = 0; k < n; k++) { 63 if (done[k]) continue; 64 future = 0.0; 65 past = 0.0; 66 for (j = ai[k]; j < ai[k + 1]; j++) { 67 kk = aj[j]; 68 if (done[kk]) past += PetscAbsScalar(aa[j]); 69 else if (kk != k) future += PetscAbsScalar(aa[j]); 70 } 71 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 72 if (past / future > best) { 73 best = past / future; 74 newcurrent = k; 75 } 76 } 77 } 78 PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 79 current = newcurrent; 80 order[i + 1] = current; 81 } 82 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 83 *icol = *irow; 84 PetscCall(PetscObjectReference((PetscObject)*irow)); 85 PetscCall(PetscFree(done)); 86 PetscCall(PetscFree(order)); 87 PetscFunctionReturn(0); 88 } 89 90 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) { 91 PetscFunctionBegin; 92 *type = MATSOLVERPETSC; 93 PetscFunctionReturn(0); 94 } 95 96 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) { 97 PetscInt n = A->rmap->n; 98 99 PetscFunctionBegin; 100 #if defined(PETSC_USE_COMPLEX) 101 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported"); 102 #endif 103 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 104 PetscCall(MatSetSizes(*B, n, n, n, n)); 105 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 106 PetscCall(MatSetType(*B, MATSEQAIJ)); 107 108 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 109 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 110 111 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 112 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 113 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 114 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 115 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 116 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 117 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 118 119 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 120 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 121 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 122 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 123 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 124 (*B)->factortype = ftype; 125 126 PetscCall(PetscFree((*B)->solvertype)); 127 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 128 (*B)->canuseordering = PETSC_TRUE; 129 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 134 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 135 IS isicol; 136 const PetscInt *r, *ic; 137 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 138 PetscInt *bi, *bj, *ajtmp; 139 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 140 PetscReal f; 141 PetscInt nlnk, *lnk, k, **bi_ptr; 142 PetscFreeSpaceList free_space = NULL, current_space = NULL; 143 PetscBT lnkbt; 144 PetscBool missing; 145 146 PetscFunctionBegin; 147 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 148 PetscCall(MatMissingDiagonal(A, &missing, &i)); 149 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 150 151 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 152 PetscCall(ISGetIndices(isrow, &r)); 153 PetscCall(ISGetIndices(isicol, &ic)); 154 155 /* get new row pointers */ 156 PetscCall(PetscMalloc1(n + 1, &bi)); 157 bi[0] = 0; 158 159 /* bdiag is location of diagonal in factor */ 160 PetscCall(PetscMalloc1(n + 1, &bdiag)); 161 bdiag[0] = 0; 162 163 /* linked list for storing column indices of the active row */ 164 nlnk = n + 1; 165 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 166 167 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 168 169 /* initial FreeSpace size is f*(ai[n]+1) */ 170 f = info->fill; 171 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 172 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 173 current_space = free_space; 174 175 for (i = 0; i < n; i++) { 176 /* copy previous fill into linked list */ 177 nzi = 0; 178 nnz = ai[r[i] + 1] - ai[r[i]]; 179 ajtmp = aj + ai[r[i]]; 180 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 181 nzi += nlnk; 182 183 /* add pivot rows into linked list */ 184 row = lnk[n]; 185 while (row < i) { 186 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 187 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 188 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 189 nzi += nlnk; 190 row = lnk[row]; 191 } 192 bi[i + 1] = bi[i] + nzi; 193 im[i] = nzi; 194 195 /* mark bdiag */ 196 nzbd = 0; 197 nnz = nzi; 198 k = lnk[n]; 199 while (nnz-- && k < i) { 200 nzbd++; 201 k = lnk[k]; 202 } 203 bdiag[i] = bi[i] + nzbd; 204 205 /* if free space is not available, make more free space */ 206 if (current_space->local_remaining < nzi) { 207 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 208 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 209 reallocs++; 210 } 211 212 /* copy data into free space, then initialize lnk */ 213 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 214 215 bi_ptr[i] = current_space->array; 216 current_space->array += nzi; 217 current_space->local_used += nzi; 218 current_space->local_remaining -= nzi; 219 } 220 #if defined(PETSC_USE_INFO) 221 if (ai[n] != 0) { 222 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 223 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 224 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 225 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 226 PetscCall(PetscInfo(A, "for best performance.\n")); 227 } else { 228 PetscCall(PetscInfo(A, "Empty matrix\n")); 229 } 230 #endif 231 232 PetscCall(ISRestoreIndices(isrow, &r)); 233 PetscCall(ISRestoreIndices(isicol, &ic)); 234 235 /* destroy list of free space and other temporary array(s) */ 236 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 237 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 238 PetscCall(PetscLLDestroy(lnk, lnkbt)); 239 PetscCall(PetscFree2(bi_ptr, im)); 240 241 /* put together the new matrix */ 242 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 243 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol)); 244 b = (Mat_SeqAIJ *)(B)->data; 245 246 b->free_a = PETSC_TRUE; 247 b->free_ij = PETSC_TRUE; 248 b->singlemalloc = PETSC_FALSE; 249 250 PetscCall(PetscMalloc1(bi[n] + 1, &b->a)); 251 b->j = bj; 252 b->i = bi; 253 b->diag = bdiag; 254 b->ilen = NULL; 255 b->imax = NULL; 256 b->row = isrow; 257 b->col = iscol; 258 PetscCall(PetscObjectReference((PetscObject)isrow)); 259 PetscCall(PetscObjectReference((PetscObject)iscol)); 260 b->icol = isicol; 261 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 262 263 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 264 PetscCall(PetscLogObjectMemory((PetscObject)B, (bi[n] - n) * (sizeof(PetscInt) + sizeof(PetscScalar)))); 265 b->maxnz = b->nz = bi[n]; 266 267 (B)->factortype = MAT_FACTOR_LU; 268 (B)->info.factor_mallocs = reallocs; 269 (B)->info.fill_ratio_given = f; 270 271 if (ai[n]) { 272 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 273 } else { 274 (B)->info.fill_ratio_needed = 0.0; 275 } 276 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 277 if (a->inode.size) { (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; } 278 PetscFunctionReturn(0); 279 } 280 281 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 282 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 283 IS isicol; 284 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 285 PetscInt i, n = A->rmap->n; 286 PetscInt *bi, *bj; 287 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 288 PetscReal f; 289 PetscInt nlnk, *lnk, k, **bi_ptr; 290 PetscFreeSpaceList free_space = NULL, current_space = NULL; 291 PetscBT lnkbt; 292 PetscBool missing; 293 294 PetscFunctionBegin; 295 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 296 PetscCall(MatMissingDiagonal(A, &missing, &i)); 297 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 298 299 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 300 PetscCall(ISGetIndices(isrow, &r)); 301 PetscCall(ISGetIndices(isicol, &ic)); 302 303 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 304 PetscCall(PetscMalloc1(n + 1, &bi)); 305 PetscCall(PetscMalloc1(n + 1, &bdiag)); 306 bi[0] = bdiag[0] = 0; 307 308 /* linked list for storing column indices of the active row */ 309 nlnk = n + 1; 310 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 311 312 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 313 314 /* initial FreeSpace size is f*(ai[n]+1) */ 315 f = info->fill; 316 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 317 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 318 current_space = free_space; 319 320 for (i = 0; i < n; i++) { 321 /* copy previous fill into linked list */ 322 nzi = 0; 323 nnz = ai[r[i] + 1] - ai[r[i]]; 324 ajtmp = aj + ai[r[i]]; 325 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 326 nzi += nlnk; 327 328 /* add pivot rows into linked list */ 329 row = lnk[n]; 330 while (row < i) { 331 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 332 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 333 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 334 nzi += nlnk; 335 row = lnk[row]; 336 } 337 bi[i + 1] = bi[i] + nzi; 338 im[i] = nzi; 339 340 /* mark bdiag */ 341 nzbd = 0; 342 nnz = nzi; 343 k = lnk[n]; 344 while (nnz-- && k < i) { 345 nzbd++; 346 k = lnk[k]; 347 } 348 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 349 350 /* if free space is not available, make more free space */ 351 if (current_space->local_remaining < nzi) { 352 /* estimated additional space needed */ 353 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 354 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 355 reallocs++; 356 } 357 358 /* copy data into free space, then initialize lnk */ 359 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 360 361 bi_ptr[i] = current_space->array; 362 current_space->array += nzi; 363 current_space->local_used += nzi; 364 current_space->local_remaining -= nzi; 365 } 366 367 PetscCall(ISRestoreIndices(isrow, &r)); 368 PetscCall(ISRestoreIndices(isicol, &ic)); 369 370 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 371 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 372 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 373 PetscCall(PetscLLDestroy(lnk, lnkbt)); 374 PetscCall(PetscFree2(bi_ptr, im)); 375 376 /* put together the new matrix */ 377 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 378 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol)); 379 b = (Mat_SeqAIJ *)(B)->data; 380 381 b->free_a = PETSC_TRUE; 382 b->free_ij = PETSC_TRUE; 383 b->singlemalloc = PETSC_FALSE; 384 385 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 386 387 b->j = bj; 388 b->i = bi; 389 b->diag = bdiag; 390 b->ilen = NULL; 391 b->imax = NULL; 392 b->row = isrow; 393 b->col = iscol; 394 PetscCall(PetscObjectReference((PetscObject)isrow)); 395 PetscCall(PetscObjectReference((PetscObject)iscol)); 396 b->icol = isicol; 397 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 398 399 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 400 PetscCall(PetscLogObjectMemory((PetscObject)B, (bdiag[0] + 1) * (sizeof(PetscInt) + sizeof(PetscScalar)))); 401 b->maxnz = b->nz = bdiag[0] + 1; 402 403 B->factortype = MAT_FACTOR_LU; 404 B->info.factor_mallocs = reallocs; 405 B->info.fill_ratio_given = f; 406 407 if (ai[n]) { 408 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 409 } else { 410 B->info.fill_ratio_needed = 0.0; 411 } 412 #if defined(PETSC_USE_INFO) 413 if (ai[n] != 0) { 414 PetscReal af = B->info.fill_ratio_needed; 415 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 416 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 417 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 418 PetscCall(PetscInfo(A, "for best performance.\n")); 419 } else { 420 PetscCall(PetscInfo(A, "Empty matrix\n")); 421 } 422 #endif 423 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 424 if (a->inode.size) { B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; } 425 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 426 PetscFunctionReturn(0); 427 } 428 429 /* 430 Trouble in factorization, should we dump the original matrix? 431 */ 432 PetscErrorCode MatFactorDumpMatrix(Mat A) { 433 PetscBool flg = PETSC_FALSE; 434 435 PetscFunctionBegin; 436 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 437 if (flg) { 438 PetscViewer viewer; 439 char filename[PETSC_MAX_PATH_LEN]; 440 441 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 442 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 443 PetscCall(MatView(A, viewer)); 444 PetscCall(PetscViewerDestroy(&viewer)); 445 } 446 PetscFunctionReturn(0); 447 } 448 449 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) { 450 Mat C = B; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 452 IS isrow = b->row, isicol = b->icol; 453 const PetscInt *r, *ic, *ics; 454 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 455 PetscInt i, j, k, nz, nzL, row, *pj; 456 const PetscInt *ajtmp, *bjtmp; 457 MatScalar *rtmp, *pc, multiplier, *pv; 458 const MatScalar *aa = a->a, *v; 459 PetscBool row_identity, col_identity; 460 FactorShiftCtx sctx; 461 const PetscInt *ddiag; 462 PetscReal rs; 463 MatScalar d; 464 465 PetscFunctionBegin; 466 /* MatPivotSetUp(): initialize shift context sctx */ 467 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 468 469 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 470 ddiag = a->diag; 471 sctx.shift_top = info->zeropivot; 472 for (i = 0; i < n; i++) { 473 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 474 d = (aa)[ddiag[i]]; 475 rs = -PetscAbsScalar(d) - PetscRealPart(d); 476 v = aa + ai[i]; 477 nz = ai[i + 1] - ai[i]; 478 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 479 if (rs > sctx.shift_top) sctx.shift_top = rs; 480 } 481 sctx.shift_top *= 1.1; 482 sctx.nshift_max = 5; 483 sctx.shift_lo = 0.; 484 sctx.shift_hi = 1.; 485 } 486 487 PetscCall(ISGetIndices(isrow, &r)); 488 PetscCall(ISGetIndices(isicol, &ic)); 489 PetscCall(PetscMalloc1(n + 1, &rtmp)); 490 ics = ic; 491 492 do { 493 sctx.newshift = PETSC_FALSE; 494 for (i = 0; i < n; i++) { 495 /* zero rtmp */ 496 /* L part */ 497 nz = bi[i + 1] - bi[i]; 498 bjtmp = bj + bi[i]; 499 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 500 501 /* U part */ 502 nz = bdiag[i] - bdiag[i + 1]; 503 bjtmp = bj + bdiag[i + 1] + 1; 504 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 505 506 /* load in initial (unfactored row) */ 507 nz = ai[r[i] + 1] - ai[r[i]]; 508 ajtmp = aj + ai[r[i]]; 509 v = aa + ai[r[i]]; 510 for (j = 0; j < nz; j++) { rtmp[ics[ajtmp[j]]] = v[j]; } 511 /* ZeropivotApply() */ 512 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 513 514 /* elimination */ 515 bjtmp = bj + bi[i]; 516 row = *bjtmp++; 517 nzL = bi[i + 1] - bi[i]; 518 for (k = 0; k < nzL; k++) { 519 pc = rtmp + row; 520 if (*pc != 0.0) { 521 pv = b->a + bdiag[row]; 522 multiplier = *pc * (*pv); 523 *pc = multiplier; 524 525 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 526 pv = b->a + bdiag[row + 1] + 1; 527 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 528 529 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 530 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 531 } 532 row = *bjtmp++; 533 } 534 535 /* finished row so stick it into b->a */ 536 rs = 0.0; 537 /* L part */ 538 pv = b->a + bi[i]; 539 pj = b->j + bi[i]; 540 nz = bi[i + 1] - bi[i]; 541 for (j = 0; j < nz; j++) { 542 pv[j] = rtmp[pj[j]]; 543 rs += PetscAbsScalar(pv[j]); 544 } 545 546 /* U part */ 547 pv = b->a + bdiag[i + 1] + 1; 548 pj = b->j + bdiag[i + 1] + 1; 549 nz = bdiag[i] - bdiag[i + 1] - 1; 550 for (j = 0; j < nz; j++) { 551 pv[j] = rtmp[pj[j]]; 552 rs += PetscAbsScalar(pv[j]); 553 } 554 555 sctx.rs = rs; 556 sctx.pv = rtmp[i]; 557 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 558 if (sctx.newshift) break; /* break for-loop */ 559 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 560 561 /* Mark diagonal and invert diagonal for simpler triangular solves */ 562 pv = b->a + bdiag[i]; 563 *pv = 1.0 / rtmp[i]; 564 565 } /* endof for (i=0; i<n; i++) { */ 566 567 /* MatPivotRefine() */ 568 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 569 /* 570 * if no shift in this attempt & shifting & started shifting & can refine, 571 * then try lower shift 572 */ 573 sctx.shift_hi = sctx.shift_fraction; 574 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 575 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 576 sctx.newshift = PETSC_TRUE; 577 sctx.nshift++; 578 } 579 } while (sctx.newshift); 580 581 PetscCall(PetscFree(rtmp)); 582 PetscCall(ISRestoreIndices(isicol, &ic)); 583 PetscCall(ISRestoreIndices(isrow, &r)); 584 585 PetscCall(ISIdentity(isrow, &row_identity)); 586 PetscCall(ISIdentity(isicol, &col_identity)); 587 if (b->inode.size) { 588 C->ops->solve = MatSolve_SeqAIJ_Inode; 589 } else if (row_identity && col_identity) { 590 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 591 } else { 592 C->ops->solve = MatSolve_SeqAIJ; 593 } 594 C->ops->solveadd = MatSolveAdd_SeqAIJ; 595 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 596 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 597 C->ops->matsolve = MatMatSolve_SeqAIJ; 598 C->assembled = PETSC_TRUE; 599 C->preallocated = PETSC_TRUE; 600 601 PetscCall(PetscLogFlops(C->cmap->n)); 602 603 /* MatShiftView(A,info,&sctx) */ 604 if (sctx.nshift) { 605 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 606 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 607 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 608 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 609 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 610 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 611 } 612 } 613 PetscFunctionReturn(0); 614 } 615 616 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) { 617 Mat C = B; 618 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 619 IS isrow = b->row, isicol = b->icol; 620 const PetscInt *r, *ic, *ics; 621 PetscInt nz, row, i, j, n = A->rmap->n, diag; 622 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 623 const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 624 MatScalar *pv, *rtmp, *pc, multiplier, d; 625 const MatScalar *v, *aa = a->a; 626 PetscReal rs = 0.0; 627 FactorShiftCtx sctx; 628 const PetscInt *ddiag; 629 PetscBool row_identity, col_identity; 630 631 PetscFunctionBegin; 632 /* MatPivotSetUp(): initialize shift context sctx */ 633 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 634 635 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 636 ddiag = a->diag; 637 sctx.shift_top = info->zeropivot; 638 for (i = 0; i < n; i++) { 639 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 640 d = (aa)[ddiag[i]]; 641 rs = -PetscAbsScalar(d) - PetscRealPart(d); 642 v = aa + ai[i]; 643 nz = ai[i + 1] - ai[i]; 644 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 645 if (rs > sctx.shift_top) sctx.shift_top = rs; 646 } 647 sctx.shift_top *= 1.1; 648 sctx.nshift_max = 5; 649 sctx.shift_lo = 0.; 650 sctx.shift_hi = 1.; 651 } 652 653 PetscCall(ISGetIndices(isrow, &r)); 654 PetscCall(ISGetIndices(isicol, &ic)); 655 PetscCall(PetscMalloc1(n + 1, &rtmp)); 656 ics = ic; 657 658 do { 659 sctx.newshift = PETSC_FALSE; 660 for (i = 0; i < n; i++) { 661 nz = bi[i + 1] - bi[i]; 662 bjtmp = bj + bi[i]; 663 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 664 665 /* load in initial (unfactored row) */ 666 nz = ai[r[i] + 1] - ai[r[i]]; 667 ajtmp = aj + ai[r[i]]; 668 v = aa + ai[r[i]]; 669 for (j = 0; j < nz; j++) { rtmp[ics[ajtmp[j]]] = v[j]; } 670 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 671 672 row = *bjtmp++; 673 while (row < i) { 674 pc = rtmp + row; 675 if (*pc != 0.0) { 676 pv = b->a + diag_offset[row]; 677 pj = b->j + diag_offset[row] + 1; 678 multiplier = *pc / *pv++; 679 *pc = multiplier; 680 nz = bi[row + 1] - diag_offset[row] - 1; 681 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 682 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 683 } 684 row = *bjtmp++; 685 } 686 /* finished row so stick it into b->a */ 687 pv = b->a + bi[i]; 688 pj = b->j + bi[i]; 689 nz = bi[i + 1] - bi[i]; 690 diag = diag_offset[i] - bi[i]; 691 rs = 0.0; 692 for (j = 0; j < nz; j++) { 693 pv[j] = rtmp[pj[j]]; 694 rs += PetscAbsScalar(pv[j]); 695 } 696 rs -= PetscAbsScalar(pv[diag]); 697 698 sctx.rs = rs; 699 sctx.pv = pv[diag]; 700 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 701 if (sctx.newshift) break; 702 pv[diag] = sctx.pv; 703 } 704 705 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 706 /* 707 * if no shift in this attempt & shifting & started shifting & can refine, 708 * then try lower shift 709 */ 710 sctx.shift_hi = sctx.shift_fraction; 711 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 712 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 713 sctx.newshift = PETSC_TRUE; 714 sctx.nshift++; 715 } 716 } while (sctx.newshift); 717 718 /* invert diagonal entries for simpler triangular solves */ 719 for (i = 0; i < n; i++) { b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; } 720 PetscCall(PetscFree(rtmp)); 721 PetscCall(ISRestoreIndices(isicol, &ic)); 722 PetscCall(ISRestoreIndices(isrow, &r)); 723 724 PetscCall(ISIdentity(isrow, &row_identity)); 725 PetscCall(ISIdentity(isicol, &col_identity)); 726 if (row_identity && col_identity) { 727 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 728 } else { 729 C->ops->solve = MatSolve_SeqAIJ_inplace; 730 } 731 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 732 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 733 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 734 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 735 736 C->assembled = PETSC_TRUE; 737 C->preallocated = PETSC_TRUE; 738 739 PetscCall(PetscLogFlops(C->cmap->n)); 740 if (sctx.nshift) { 741 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 742 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 743 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 744 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 745 } 746 } 747 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 748 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 749 750 PetscCall(MatSeqAIJCheckInode(C)); 751 PetscFunctionReturn(0); 752 } 753 754 /* 755 This routine implements inplace ILU(0) with row or/and column permutations. 756 Input: 757 A - original matrix 758 Output; 759 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 760 a->j (col index) is permuted by the inverse of colperm, then sorted 761 a->a reordered accordingly with a->j 762 a->diag (ptr to diagonal elements) is updated. 763 */ 764 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) { 765 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 766 IS isrow = a->row, isicol = a->icol; 767 const PetscInt *r, *ic, *ics; 768 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 769 PetscInt *ajtmp, nz, row; 770 PetscInt *diag = a->diag, nbdiag, *pj; 771 PetscScalar *rtmp, *pc, multiplier, d; 772 MatScalar *pv, *v; 773 PetscReal rs; 774 FactorShiftCtx sctx; 775 const MatScalar *aa = a->a, *vtmp; 776 777 PetscFunctionBegin; 778 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 779 780 /* MatPivotSetUp(): initialize shift context sctx */ 781 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 782 783 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 784 const PetscInt *ddiag = a->diag; 785 sctx.shift_top = info->zeropivot; 786 for (i = 0; i < n; i++) { 787 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 788 d = (aa)[ddiag[i]]; 789 rs = -PetscAbsScalar(d) - PetscRealPart(d); 790 vtmp = aa + ai[i]; 791 nz = ai[i + 1] - ai[i]; 792 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 793 if (rs > sctx.shift_top) sctx.shift_top = rs; 794 } 795 sctx.shift_top *= 1.1; 796 sctx.nshift_max = 5; 797 sctx.shift_lo = 0.; 798 sctx.shift_hi = 1.; 799 } 800 801 PetscCall(ISGetIndices(isrow, &r)); 802 PetscCall(ISGetIndices(isicol, &ic)); 803 PetscCall(PetscMalloc1(n + 1, &rtmp)); 804 PetscCall(PetscArrayzero(rtmp, n + 1)); 805 ics = ic; 806 807 #if defined(MV) 808 sctx.shift_top = 0.; 809 sctx.nshift_max = 0; 810 sctx.shift_lo = 0.; 811 sctx.shift_hi = 0.; 812 sctx.shift_fraction = 0.; 813 814 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 815 sctx.shift_top = 0.; 816 for (i = 0; i < n; i++) { 817 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 818 d = (a->a)[diag[i]]; 819 rs = -PetscAbsScalar(d) - PetscRealPart(d); 820 v = a->a + ai[i]; 821 nz = ai[i + 1] - ai[i]; 822 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 823 if (rs > sctx.shift_top) sctx.shift_top = rs; 824 } 825 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 826 sctx.shift_top *= 1.1; 827 sctx.nshift_max = 5; 828 sctx.shift_lo = 0.; 829 sctx.shift_hi = 1.; 830 } 831 832 sctx.shift_amount = 0.; 833 sctx.nshift = 0; 834 #endif 835 836 do { 837 sctx.newshift = PETSC_FALSE; 838 for (i = 0; i < n; i++) { 839 /* load in initial unfactored row */ 840 nz = ai[r[i] + 1] - ai[r[i]]; 841 ajtmp = aj + ai[r[i]]; 842 v = a->a + ai[r[i]]; 843 /* sort permuted ajtmp and values v accordingly */ 844 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 845 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 846 847 diag[r[i]] = ai[r[i]]; 848 for (j = 0; j < nz; j++) { 849 rtmp[ajtmp[j]] = v[j]; 850 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 851 } 852 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 853 854 row = *ajtmp++; 855 while (row < i) { 856 pc = rtmp + row; 857 if (*pc != 0.0) { 858 pv = a->a + diag[r[row]]; 859 pj = aj + diag[r[row]] + 1; 860 861 multiplier = *pc / *pv++; 862 *pc = multiplier; 863 nz = ai[r[row] + 1] - diag[r[row]] - 1; 864 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 865 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 866 } 867 row = *ajtmp++; 868 } 869 /* finished row so overwrite it onto a->a */ 870 pv = a->a + ai[r[i]]; 871 pj = aj + ai[r[i]]; 872 nz = ai[r[i] + 1] - ai[r[i]]; 873 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 874 875 rs = 0.0; 876 for (j = 0; j < nz; j++) { 877 pv[j] = rtmp[pj[j]]; 878 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 879 } 880 881 sctx.rs = rs; 882 sctx.pv = pv[nbdiag]; 883 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 884 if (sctx.newshift) break; 885 pv[nbdiag] = sctx.pv; 886 } 887 888 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 889 /* 890 * if no shift in this attempt & shifting & started shifting & can refine, 891 * then try lower shift 892 */ 893 sctx.shift_hi = sctx.shift_fraction; 894 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 895 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 896 sctx.newshift = PETSC_TRUE; 897 sctx.nshift++; 898 } 899 } while (sctx.newshift); 900 901 /* invert diagonal entries for simpler triangular solves */ 902 for (i = 0; i < n; i++) { a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; } 903 904 PetscCall(PetscFree(rtmp)); 905 PetscCall(ISRestoreIndices(isicol, &ic)); 906 PetscCall(ISRestoreIndices(isrow, &r)); 907 908 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 909 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 910 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 911 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 912 913 A->assembled = PETSC_TRUE; 914 A->preallocated = PETSC_TRUE; 915 916 PetscCall(PetscLogFlops(A->cmap->n)); 917 if (sctx.nshift) { 918 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 919 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 920 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 921 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 922 } 923 } 924 PetscFunctionReturn(0); 925 } 926 927 /* ----------------------------------------------------------- */ 928 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) { 929 Mat C; 930 931 PetscFunctionBegin; 932 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 933 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 934 PetscCall(MatLUFactorNumeric(C, A, info)); 935 936 A->ops->solve = C->ops->solve; 937 A->ops->solvetranspose = C->ops->solvetranspose; 938 939 PetscCall(MatHeaderMerge(A, &C)); 940 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)((Mat_SeqAIJ *)(A->data))->icol)); 941 PetscFunctionReturn(0); 942 } 943 /* ----------------------------------------------------------- */ 944 945 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) { 946 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 947 IS iscol = a->col, isrow = a->row; 948 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 949 PetscInt nz; 950 const PetscInt *rout, *cout, *r, *c; 951 PetscScalar *x, *tmp, *tmps, sum; 952 const PetscScalar *b; 953 const MatScalar *aa = a->a, *v; 954 955 PetscFunctionBegin; 956 if (!n) PetscFunctionReturn(0); 957 958 PetscCall(VecGetArrayRead(bb, &b)); 959 PetscCall(VecGetArrayWrite(xx, &x)); 960 tmp = a->solve_work; 961 962 PetscCall(ISGetIndices(isrow, &rout)); 963 r = rout; 964 PetscCall(ISGetIndices(iscol, &cout)); 965 c = cout + (n - 1); 966 967 /* forward solve the lower triangular */ 968 tmp[0] = b[*r++]; 969 tmps = tmp; 970 for (i = 1; i < n; i++) { 971 v = aa + ai[i]; 972 vi = aj + ai[i]; 973 nz = a->diag[i] - ai[i]; 974 sum = b[*r++]; 975 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 976 tmp[i] = sum; 977 } 978 979 /* backward solve the upper triangular */ 980 for (i = n - 1; i >= 0; i--) { 981 v = aa + a->diag[i] + 1; 982 vi = aj + a->diag[i] + 1; 983 nz = ai[i + 1] - a->diag[i] - 1; 984 sum = tmp[i]; 985 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 986 x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 987 } 988 989 PetscCall(ISRestoreIndices(isrow, &rout)); 990 PetscCall(ISRestoreIndices(iscol, &cout)); 991 PetscCall(VecRestoreArrayRead(bb, &b)); 992 PetscCall(VecRestoreArrayWrite(xx, &x)); 993 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) { 998 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 999 IS iscol = a->col, isrow = a->row; 1000 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1001 PetscInt nz, neq, ldb, ldx; 1002 const PetscInt *rout, *cout, *r, *c; 1003 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1004 const PetscScalar *b, *aa = a->a, *v; 1005 PetscBool isdense; 1006 1007 PetscFunctionBegin; 1008 if (!n) PetscFunctionReturn(0); 1009 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1010 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1011 if (X != B) { 1012 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1013 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1014 } 1015 PetscCall(MatDenseGetArrayRead(B, &b)); 1016 PetscCall(MatDenseGetLDA(B, &ldb)); 1017 PetscCall(MatDenseGetArray(X, &x)); 1018 PetscCall(MatDenseGetLDA(X, &ldx)); 1019 PetscCall(ISGetIndices(isrow, &rout)); 1020 r = rout; 1021 PetscCall(ISGetIndices(iscol, &cout)); 1022 c = cout; 1023 for (neq = 0; neq < B->cmap->n; neq++) { 1024 /* forward solve the lower triangular */ 1025 tmp[0] = b[r[0]]; 1026 tmps = tmp; 1027 for (i = 1; i < n; i++) { 1028 v = aa + ai[i]; 1029 vi = aj + ai[i]; 1030 nz = a->diag[i] - ai[i]; 1031 sum = b[r[i]]; 1032 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1033 tmp[i] = sum; 1034 } 1035 /* backward solve the upper triangular */ 1036 for (i = n - 1; i >= 0; i--) { 1037 v = aa + a->diag[i] + 1; 1038 vi = aj + a->diag[i] + 1; 1039 nz = ai[i + 1] - a->diag[i] - 1; 1040 sum = tmp[i]; 1041 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1042 x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 1043 } 1044 b += ldb; 1045 x += ldx; 1046 } 1047 PetscCall(ISRestoreIndices(isrow, &rout)); 1048 PetscCall(ISRestoreIndices(iscol, &cout)); 1049 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1050 PetscCall(MatDenseRestoreArray(X, &x)); 1051 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) { 1056 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1057 IS iscol = a->col, isrow = a->row; 1058 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1059 PetscInt nz, neq, ldb, ldx; 1060 const PetscInt *rout, *cout, *r, *c; 1061 PetscScalar *x, *tmp = a->solve_work, sum; 1062 const PetscScalar *b, *aa = a->a, *v; 1063 PetscBool isdense; 1064 1065 PetscFunctionBegin; 1066 if (!n) PetscFunctionReturn(0); 1067 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1068 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1069 if (X != B) { 1070 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1071 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1072 } 1073 PetscCall(MatDenseGetArrayRead(B, &b)); 1074 PetscCall(MatDenseGetLDA(B, &ldb)); 1075 PetscCall(MatDenseGetArray(X, &x)); 1076 PetscCall(MatDenseGetLDA(X, &ldx)); 1077 PetscCall(ISGetIndices(isrow, &rout)); 1078 r = rout; 1079 PetscCall(ISGetIndices(iscol, &cout)); 1080 c = cout; 1081 for (neq = 0; neq < B->cmap->n; neq++) { 1082 /* forward solve the lower triangular */ 1083 tmp[0] = b[r[0]]; 1084 v = aa; 1085 vi = aj; 1086 for (i = 1; i < n; i++) { 1087 nz = ai[i + 1] - ai[i]; 1088 sum = b[r[i]]; 1089 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1090 tmp[i] = sum; 1091 v += nz; 1092 vi += nz; 1093 } 1094 /* backward solve the upper triangular */ 1095 for (i = n - 1; i >= 0; i--) { 1096 v = aa + adiag[i + 1] + 1; 1097 vi = aj + adiag[i + 1] + 1; 1098 nz = adiag[i] - adiag[i + 1] - 1; 1099 sum = tmp[i]; 1100 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1101 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 1102 } 1103 b += ldb; 1104 x += ldx; 1105 } 1106 PetscCall(ISRestoreIndices(isrow, &rout)); 1107 PetscCall(ISRestoreIndices(iscol, &cout)); 1108 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1109 PetscCall(MatDenseRestoreArray(X, &x)); 1110 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) { 1115 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1116 IS iscol = a->col, isrow = a->row; 1117 const PetscInt *r, *c, *rout, *cout; 1118 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1119 PetscInt nz, row; 1120 PetscScalar *x, *tmp, *tmps, sum; 1121 const PetscScalar *b; 1122 const MatScalar *aa = a->a, *v; 1123 1124 PetscFunctionBegin; 1125 if (!n) PetscFunctionReturn(0); 1126 1127 PetscCall(VecGetArrayRead(bb, &b)); 1128 PetscCall(VecGetArrayWrite(xx, &x)); 1129 tmp = a->solve_work; 1130 1131 PetscCall(ISGetIndices(isrow, &rout)); 1132 r = rout; 1133 PetscCall(ISGetIndices(iscol, &cout)); 1134 c = cout + (n - 1); 1135 1136 /* forward solve the lower triangular */ 1137 tmp[0] = b[*r++]; 1138 tmps = tmp; 1139 for (row = 1; row < n; row++) { 1140 i = rout[row]; /* permuted row */ 1141 v = aa + ai[i]; 1142 vi = aj + ai[i]; 1143 nz = a->diag[i] - ai[i]; 1144 sum = b[*r++]; 1145 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1146 tmp[row] = sum; 1147 } 1148 1149 /* backward solve the upper triangular */ 1150 for (row = n - 1; row >= 0; row--) { 1151 i = rout[row]; /* permuted row */ 1152 v = aa + a->diag[i] + 1; 1153 vi = aj + a->diag[i] + 1; 1154 nz = ai[i + 1] - a->diag[i] - 1; 1155 sum = tmp[row]; 1156 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1157 x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1158 } 1159 1160 PetscCall(ISRestoreIndices(isrow, &rout)); 1161 PetscCall(ISRestoreIndices(iscol, &cout)); 1162 PetscCall(VecRestoreArrayRead(bb, &b)); 1163 PetscCall(VecRestoreArrayWrite(xx, &x)); 1164 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 /* ----------------------------------------------------------- */ 1169 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1170 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) { 1171 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1172 PetscInt n = A->rmap->n; 1173 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1174 PetscScalar *x; 1175 const PetscScalar *b; 1176 const MatScalar *aa = a->a; 1177 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1178 PetscInt adiag_i, i, nz, ai_i; 1179 const PetscInt *vi; 1180 const MatScalar *v; 1181 PetscScalar sum; 1182 #endif 1183 1184 PetscFunctionBegin; 1185 if (!n) PetscFunctionReturn(0); 1186 1187 PetscCall(VecGetArrayRead(bb, &b)); 1188 PetscCall(VecGetArrayWrite(xx, &x)); 1189 1190 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1191 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1192 #else 1193 /* forward solve the lower triangular */ 1194 x[0] = b[0]; 1195 for (i = 1; i < n; i++) { 1196 ai_i = ai[i]; 1197 v = aa + ai_i; 1198 vi = aj + ai_i; 1199 nz = adiag[i] - ai_i; 1200 sum = b[i]; 1201 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1202 x[i] = sum; 1203 } 1204 1205 /* backward solve the upper triangular */ 1206 for (i = n - 1; i >= 0; i--) { 1207 adiag_i = adiag[i]; 1208 v = aa + adiag_i + 1; 1209 vi = aj + adiag_i + 1; 1210 nz = ai[i + 1] - adiag_i - 1; 1211 sum = x[i]; 1212 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1213 x[i] = sum * aa[adiag_i]; 1214 } 1215 #endif 1216 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1217 PetscCall(VecRestoreArrayRead(bb, &b)); 1218 PetscCall(VecRestoreArrayWrite(xx, &x)); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) { 1223 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1224 IS iscol = a->col, isrow = a->row; 1225 PetscInt i, n = A->rmap->n, j; 1226 PetscInt nz; 1227 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 1228 PetscScalar *x, *tmp, sum; 1229 const PetscScalar *b; 1230 const MatScalar *aa = a->a, *v; 1231 1232 PetscFunctionBegin; 1233 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1234 1235 PetscCall(VecGetArrayRead(bb, &b)); 1236 PetscCall(VecGetArray(xx, &x)); 1237 tmp = a->solve_work; 1238 1239 PetscCall(ISGetIndices(isrow, &rout)); 1240 r = rout; 1241 PetscCall(ISGetIndices(iscol, &cout)); 1242 c = cout + (n - 1); 1243 1244 /* forward solve the lower triangular */ 1245 tmp[0] = b[*r++]; 1246 for (i = 1; i < n; i++) { 1247 v = aa + ai[i]; 1248 vi = aj + ai[i]; 1249 nz = a->diag[i] - ai[i]; 1250 sum = b[*r++]; 1251 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1252 tmp[i] = sum; 1253 } 1254 1255 /* backward solve the upper triangular */ 1256 for (i = n - 1; i >= 0; i--) { 1257 v = aa + a->diag[i] + 1; 1258 vi = aj + a->diag[i] + 1; 1259 nz = ai[i + 1] - a->diag[i] - 1; 1260 sum = tmp[i]; 1261 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1262 tmp[i] = sum * aa[a->diag[i]]; 1263 x[*c--] += tmp[i]; 1264 } 1265 1266 PetscCall(ISRestoreIndices(isrow, &rout)); 1267 PetscCall(ISRestoreIndices(iscol, &cout)); 1268 PetscCall(VecRestoreArrayRead(bb, &b)); 1269 PetscCall(VecRestoreArray(xx, &x)); 1270 PetscCall(PetscLogFlops(2.0 * a->nz)); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) { 1275 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1276 IS iscol = a->col, isrow = a->row; 1277 PetscInt i, n = A->rmap->n, j; 1278 PetscInt nz; 1279 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1280 PetscScalar *x, *tmp, sum; 1281 const PetscScalar *b; 1282 const MatScalar *aa = a->a, *v; 1283 1284 PetscFunctionBegin; 1285 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1286 1287 PetscCall(VecGetArrayRead(bb, &b)); 1288 PetscCall(VecGetArray(xx, &x)); 1289 tmp = a->solve_work; 1290 1291 PetscCall(ISGetIndices(isrow, &rout)); 1292 r = rout; 1293 PetscCall(ISGetIndices(iscol, &cout)); 1294 c = cout; 1295 1296 /* forward solve the lower triangular */ 1297 tmp[0] = b[r[0]]; 1298 v = aa; 1299 vi = aj; 1300 for (i = 1; i < n; i++) { 1301 nz = ai[i + 1] - ai[i]; 1302 sum = b[r[i]]; 1303 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1304 tmp[i] = sum; 1305 v += nz; 1306 vi += nz; 1307 } 1308 1309 /* backward solve the upper triangular */ 1310 v = aa + adiag[n - 1]; 1311 vi = aj + adiag[n - 1]; 1312 for (i = n - 1; i >= 0; i--) { 1313 nz = adiag[i] - adiag[i + 1] - 1; 1314 sum = tmp[i]; 1315 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1316 tmp[i] = sum * v[nz]; 1317 x[c[i]] += tmp[i]; 1318 v += nz + 1; 1319 vi += nz + 1; 1320 } 1321 1322 PetscCall(ISRestoreIndices(isrow, &rout)); 1323 PetscCall(ISRestoreIndices(iscol, &cout)); 1324 PetscCall(VecRestoreArrayRead(bb, &b)); 1325 PetscCall(VecRestoreArray(xx, &x)); 1326 PetscCall(PetscLogFlops(2.0 * a->nz)); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) { 1331 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1332 IS iscol = a->col, isrow = a->row; 1333 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1334 PetscInt i, n = A->rmap->n, j; 1335 PetscInt nz; 1336 PetscScalar *x, *tmp, s1; 1337 const MatScalar *aa = a->a, *v; 1338 const PetscScalar *b; 1339 1340 PetscFunctionBegin; 1341 PetscCall(VecGetArrayRead(bb, &b)); 1342 PetscCall(VecGetArrayWrite(xx, &x)); 1343 tmp = a->solve_work; 1344 1345 PetscCall(ISGetIndices(isrow, &rout)); 1346 r = rout; 1347 PetscCall(ISGetIndices(iscol, &cout)); 1348 c = cout; 1349 1350 /* copy the b into temp work space according to permutation */ 1351 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1352 1353 /* forward solve the U^T */ 1354 for (i = 0; i < n; i++) { 1355 v = aa + diag[i]; 1356 vi = aj + diag[i] + 1; 1357 nz = ai[i + 1] - diag[i] - 1; 1358 s1 = tmp[i]; 1359 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1360 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1361 tmp[i] = s1; 1362 } 1363 1364 /* backward solve the L^T */ 1365 for (i = n - 1; i >= 0; i--) { 1366 v = aa + diag[i] - 1; 1367 vi = aj + diag[i] - 1; 1368 nz = diag[i] - ai[i]; 1369 s1 = tmp[i]; 1370 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1371 } 1372 1373 /* copy tmp into x according to permutation */ 1374 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1375 1376 PetscCall(ISRestoreIndices(isrow, &rout)); 1377 PetscCall(ISRestoreIndices(iscol, &cout)); 1378 PetscCall(VecRestoreArrayRead(bb, &b)); 1379 PetscCall(VecRestoreArrayWrite(xx, &x)); 1380 1381 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) { 1386 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1387 IS iscol = a->col, isrow = a->row; 1388 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1389 PetscInt i, n = A->rmap->n, j; 1390 PetscInt nz; 1391 PetscScalar *x, *tmp, s1; 1392 const MatScalar *aa = a->a, *v; 1393 const PetscScalar *b; 1394 1395 PetscFunctionBegin; 1396 PetscCall(VecGetArrayRead(bb, &b)); 1397 PetscCall(VecGetArrayWrite(xx, &x)); 1398 tmp = a->solve_work; 1399 1400 PetscCall(ISGetIndices(isrow, &rout)); 1401 r = rout; 1402 PetscCall(ISGetIndices(iscol, &cout)); 1403 c = cout; 1404 1405 /* copy the b into temp work space according to permutation */ 1406 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1407 1408 /* forward solve the U^T */ 1409 for (i = 0; i < n; i++) { 1410 v = aa + adiag[i + 1] + 1; 1411 vi = aj + adiag[i + 1] + 1; 1412 nz = adiag[i] - adiag[i + 1] - 1; 1413 s1 = tmp[i]; 1414 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1415 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1416 tmp[i] = s1; 1417 } 1418 1419 /* backward solve the L^T */ 1420 for (i = n - 1; i >= 0; i--) { 1421 v = aa + ai[i]; 1422 vi = aj + ai[i]; 1423 nz = ai[i + 1] - ai[i]; 1424 s1 = tmp[i]; 1425 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1426 } 1427 1428 /* copy tmp into x according to permutation */ 1429 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1430 1431 PetscCall(ISRestoreIndices(isrow, &rout)); 1432 PetscCall(ISRestoreIndices(iscol, &cout)); 1433 PetscCall(VecRestoreArrayRead(bb, &b)); 1434 PetscCall(VecRestoreArrayWrite(xx, &x)); 1435 1436 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) { 1441 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1442 IS iscol = a->col, isrow = a->row; 1443 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1444 PetscInt i, n = A->rmap->n, j; 1445 PetscInt nz; 1446 PetscScalar *x, *tmp, s1; 1447 const MatScalar *aa = a->a, *v; 1448 const PetscScalar *b; 1449 1450 PetscFunctionBegin; 1451 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1452 PetscCall(VecGetArrayRead(bb, &b)); 1453 PetscCall(VecGetArray(xx, &x)); 1454 tmp = a->solve_work; 1455 1456 PetscCall(ISGetIndices(isrow, &rout)); 1457 r = rout; 1458 PetscCall(ISGetIndices(iscol, &cout)); 1459 c = cout; 1460 1461 /* copy the b into temp work space according to permutation */ 1462 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1463 1464 /* forward solve the U^T */ 1465 for (i = 0; i < n; i++) { 1466 v = aa + diag[i]; 1467 vi = aj + diag[i] + 1; 1468 nz = ai[i + 1] - diag[i] - 1; 1469 s1 = tmp[i]; 1470 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1471 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1472 tmp[i] = s1; 1473 } 1474 1475 /* backward solve the L^T */ 1476 for (i = n - 1; i >= 0; i--) { 1477 v = aa + diag[i] - 1; 1478 vi = aj + diag[i] - 1; 1479 nz = diag[i] - ai[i]; 1480 s1 = tmp[i]; 1481 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1482 } 1483 1484 /* copy tmp into x according to permutation */ 1485 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1486 1487 PetscCall(ISRestoreIndices(isrow, &rout)); 1488 PetscCall(ISRestoreIndices(iscol, &cout)); 1489 PetscCall(VecRestoreArrayRead(bb, &b)); 1490 PetscCall(VecRestoreArray(xx, &x)); 1491 1492 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1493 PetscFunctionReturn(0); 1494 } 1495 1496 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) { 1497 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1498 IS iscol = a->col, isrow = a->row; 1499 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1500 PetscInt i, n = A->rmap->n, j; 1501 PetscInt nz; 1502 PetscScalar *x, *tmp, s1; 1503 const MatScalar *aa = a->a, *v; 1504 const PetscScalar *b; 1505 1506 PetscFunctionBegin; 1507 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1508 PetscCall(VecGetArrayRead(bb, &b)); 1509 PetscCall(VecGetArray(xx, &x)); 1510 tmp = a->solve_work; 1511 1512 PetscCall(ISGetIndices(isrow, &rout)); 1513 r = rout; 1514 PetscCall(ISGetIndices(iscol, &cout)); 1515 c = cout; 1516 1517 /* copy the b into temp work space according to permutation */ 1518 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1519 1520 /* forward solve the U^T */ 1521 for (i = 0; i < n; i++) { 1522 v = aa + adiag[i + 1] + 1; 1523 vi = aj + adiag[i + 1] + 1; 1524 nz = adiag[i] - adiag[i + 1] - 1; 1525 s1 = tmp[i]; 1526 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1527 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1528 tmp[i] = s1; 1529 } 1530 1531 /* backward solve the L^T */ 1532 for (i = n - 1; i >= 0; i--) { 1533 v = aa + ai[i]; 1534 vi = aj + ai[i]; 1535 nz = ai[i + 1] - ai[i]; 1536 s1 = tmp[i]; 1537 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1538 } 1539 1540 /* copy tmp into x according to permutation */ 1541 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1542 1543 PetscCall(ISRestoreIndices(isrow, &rout)); 1544 PetscCall(ISRestoreIndices(iscol, &cout)); 1545 PetscCall(VecRestoreArrayRead(bb, &b)); 1546 PetscCall(VecRestoreArray(xx, &x)); 1547 1548 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1549 PetscFunctionReturn(0); 1550 } 1551 1552 /* ----------------------------------------------------------------*/ 1553 1554 /* 1555 ilu() under revised new data structure. 1556 Factored arrays bj and ba are stored as 1557 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1558 1559 bi=fact->i is an array of size n+1, in which 1560 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1561 bi[n]: points to L(n-1,n-1)+1 1562 1563 bdiag=fact->diag is an array of size n+1,in which 1564 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1565 bdiag[n]: points to entry of U(n-1,0)-1 1566 1567 U(i,:) contains bdiag[i] as its last entry, i.e., 1568 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1569 */ 1570 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 1571 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1572 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1573 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1574 IS isicol; 1575 1576 PetscFunctionBegin; 1577 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1578 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1579 b = (Mat_SeqAIJ *)(fact)->data; 1580 1581 /* allocate matrix arrays for new data structure */ 1582 PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i)); 1583 PetscCall(PetscLogObjectMemory((PetscObject)fact, ai[n] * (sizeof(PetscScalar) + sizeof(PetscInt)) + (n + 1) * sizeof(PetscInt))); 1584 1585 b->singlemalloc = PETSC_TRUE; 1586 if (!b->diag) { 1587 PetscCall(PetscMalloc1(n + 1, &b->diag)); 1588 PetscCall(PetscLogObjectMemory((PetscObject)fact, (n + 1) * sizeof(PetscInt))); 1589 } 1590 bdiag = b->diag; 1591 1592 if (n > 0) { PetscCall(PetscArrayzero(b->a, ai[n])); } 1593 1594 /* set bi and bj with new data structure */ 1595 bi = b->i; 1596 bj = b->j; 1597 1598 /* L part */ 1599 bi[0] = 0; 1600 for (i = 0; i < n; i++) { 1601 nz = adiag[i] - ai[i]; 1602 bi[i + 1] = bi[i] + nz; 1603 aj = a->j + ai[i]; 1604 for (j = 0; j < nz; j++) { bj[k++] = aj[j]; } 1605 } 1606 1607 /* U part */ 1608 bdiag[n] = bi[n] - 1; 1609 for (i = n - 1; i >= 0; i--) { 1610 nz = ai[i + 1] - adiag[i] - 1; 1611 aj = a->j + adiag[i] + 1; 1612 for (j = 0; j < nz; j++) { bj[k++] = aj[j]; } 1613 /* diag[i] */ 1614 bj[k++] = i; 1615 bdiag[i] = bdiag[i + 1] + nz + 1; 1616 } 1617 1618 fact->factortype = MAT_FACTOR_ILU; 1619 fact->info.factor_mallocs = 0; 1620 fact->info.fill_ratio_given = info->fill; 1621 fact->info.fill_ratio_needed = 1.0; 1622 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1623 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1624 1625 b = (Mat_SeqAIJ *)(fact)->data; 1626 b->row = isrow; 1627 b->col = iscol; 1628 b->icol = isicol; 1629 PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 1630 PetscCall(PetscObjectReference((PetscObject)isrow)); 1631 PetscCall(PetscObjectReference((PetscObject)iscol)); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 1636 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1637 IS isicol; 1638 const PetscInt *r, *ic; 1639 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1640 PetscInt *bi, *cols, nnz, *cols_lvl; 1641 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1642 PetscInt i, levels, diagonal_fill; 1643 PetscBool col_identity, row_identity, missing; 1644 PetscReal f; 1645 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1646 PetscBT lnkbt; 1647 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1648 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1649 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1650 1651 PetscFunctionBegin; 1652 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1653 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1654 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1655 1656 levels = (PetscInt)info->levels; 1657 PetscCall(ISIdentity(isrow, &row_identity)); 1658 PetscCall(ISIdentity(iscol, &col_identity)); 1659 if (!levels && row_identity && col_identity) { 1660 /* special case: ilu(0) with natural ordering */ 1661 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1662 if (a->inode.size) { fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; } 1663 PetscFunctionReturn(0); 1664 } 1665 1666 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1667 PetscCall(ISGetIndices(isrow, &r)); 1668 PetscCall(ISGetIndices(isicol, &ic)); 1669 1670 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1671 PetscCall(PetscMalloc1(n + 1, &bi)); 1672 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1673 bi[0] = bdiag[0] = 0; 1674 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1675 1676 /* create a linked list for storing column indices of the active row */ 1677 nlnk = n + 1; 1678 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1679 1680 /* initial FreeSpace size is f*(ai[n]+1) */ 1681 f = info->fill; 1682 diagonal_fill = (PetscInt)info->diagonal_fill; 1683 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1684 current_space = free_space; 1685 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1686 current_space_lvl = free_space_lvl; 1687 for (i = 0; i < n; i++) { 1688 nzi = 0; 1689 /* copy current row into linked list */ 1690 nnz = ai[r[i] + 1] - ai[r[i]]; 1691 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1692 cols = aj + ai[r[i]]; 1693 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1694 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1695 nzi += nlnk; 1696 1697 /* make sure diagonal entry is included */ 1698 if (diagonal_fill && lnk[i] == -1) { 1699 fm = n; 1700 while (lnk[fm] < i) fm = lnk[fm]; 1701 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1702 lnk[fm] = i; 1703 lnk_lvl[i] = 0; 1704 nzi++; 1705 dcount++; 1706 } 1707 1708 /* add pivot rows into the active row */ 1709 nzbd = 0; 1710 prow = lnk[n]; 1711 while (prow < i) { 1712 nnz = bdiag[prow]; 1713 cols = bj_ptr[prow] + nnz + 1; 1714 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1715 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1716 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1717 nzi += nlnk; 1718 prow = lnk[prow]; 1719 nzbd++; 1720 } 1721 bdiag[i] = nzbd; 1722 bi[i + 1] = bi[i] + nzi; 1723 /* if free space is not available, make more free space */ 1724 if (current_space->local_remaining < nzi) { 1725 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1726 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1727 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1728 reallocs++; 1729 } 1730 1731 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1732 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1733 bj_ptr[i] = current_space->array; 1734 bjlvl_ptr[i] = current_space_lvl->array; 1735 1736 /* make sure the active row i has diagonal entry */ 1737 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1738 1739 current_space->array += nzi; 1740 current_space->local_used += nzi; 1741 current_space->local_remaining -= nzi; 1742 current_space_lvl->array += nzi; 1743 current_space_lvl->local_used += nzi; 1744 current_space_lvl->local_remaining -= nzi; 1745 } 1746 1747 PetscCall(ISRestoreIndices(isrow, &r)); 1748 PetscCall(ISRestoreIndices(isicol, &ic)); 1749 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1750 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1751 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1752 1753 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1754 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1755 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1756 1757 #if defined(PETSC_USE_INFO) 1758 { 1759 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1760 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1761 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1762 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1763 PetscCall(PetscInfo(A, "for best performance.\n")); 1764 if (diagonal_fill) { PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); } 1765 } 1766 #endif 1767 /* put together the new matrix */ 1768 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1769 PetscCall(PetscLogObjectParent((PetscObject)fact, (PetscObject)isicol)); 1770 b = (Mat_SeqAIJ *)(fact)->data; 1771 1772 b->free_a = PETSC_TRUE; 1773 b->free_ij = PETSC_TRUE; 1774 b->singlemalloc = PETSC_FALSE; 1775 1776 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 1777 1778 b->j = bj; 1779 b->i = bi; 1780 b->diag = bdiag; 1781 b->ilen = NULL; 1782 b->imax = NULL; 1783 b->row = isrow; 1784 b->col = iscol; 1785 PetscCall(PetscObjectReference((PetscObject)isrow)); 1786 PetscCall(PetscObjectReference((PetscObject)iscol)); 1787 b->icol = isicol; 1788 1789 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1790 /* In b structure: Free imax, ilen, old a, old j. 1791 Allocate bdiag, solve_work, new a, new j */ 1792 PetscCall(PetscLogObjectMemory((PetscObject)fact, (bdiag[0] + 1) * (sizeof(PetscInt) + sizeof(PetscScalar)))); 1793 b->maxnz = b->nz = bdiag[0] + 1; 1794 1795 (fact)->info.factor_mallocs = reallocs; 1796 (fact)->info.fill_ratio_given = f; 1797 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1798 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1799 if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; } 1800 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1801 PetscFunctionReturn(0); 1802 } 1803 1804 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 1805 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1806 IS isicol; 1807 const PetscInt *r, *ic; 1808 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1809 PetscInt *bi, *cols, nnz, *cols_lvl; 1810 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1811 PetscInt i, levels, diagonal_fill; 1812 PetscBool col_identity, row_identity; 1813 PetscReal f; 1814 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1815 PetscBT lnkbt; 1816 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1817 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1818 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1819 PetscBool missing; 1820 1821 PetscFunctionBegin; 1822 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1823 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1824 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1825 1826 f = info->fill; 1827 levels = (PetscInt)info->levels; 1828 diagonal_fill = (PetscInt)info->diagonal_fill; 1829 1830 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1831 1832 PetscCall(ISIdentity(isrow, &row_identity)); 1833 PetscCall(ISIdentity(iscol, &col_identity)); 1834 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1835 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 1836 1837 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1838 if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; } 1839 fact->factortype = MAT_FACTOR_ILU; 1840 (fact)->info.factor_mallocs = 0; 1841 (fact)->info.fill_ratio_given = info->fill; 1842 (fact)->info.fill_ratio_needed = 1.0; 1843 1844 b = (Mat_SeqAIJ *)(fact)->data; 1845 b->row = isrow; 1846 b->col = iscol; 1847 b->icol = isicol; 1848 PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 1849 PetscCall(PetscObjectReference((PetscObject)isrow)); 1850 PetscCall(PetscObjectReference((PetscObject)iscol)); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscCall(ISGetIndices(isrow, &r)); 1855 PetscCall(ISGetIndices(isicol, &ic)); 1856 1857 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1858 PetscCall(PetscMalloc1(n + 1, &bi)); 1859 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1860 bi[0] = bdiag[0] = 0; 1861 1862 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1863 1864 /* create a linked list for storing column indices of the active row */ 1865 nlnk = n + 1; 1866 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1867 1868 /* initial FreeSpace size is f*(ai[n]+1) */ 1869 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1870 current_space = free_space; 1871 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1872 current_space_lvl = free_space_lvl; 1873 1874 for (i = 0; i < n; i++) { 1875 nzi = 0; 1876 /* copy current row into linked list */ 1877 nnz = ai[r[i] + 1] - ai[r[i]]; 1878 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1879 cols = aj + ai[r[i]]; 1880 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1881 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1882 nzi += nlnk; 1883 1884 /* make sure diagonal entry is included */ 1885 if (diagonal_fill && lnk[i] == -1) { 1886 fm = n; 1887 while (lnk[fm] < i) fm = lnk[fm]; 1888 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1889 lnk[fm] = i; 1890 lnk_lvl[i] = 0; 1891 nzi++; 1892 dcount++; 1893 } 1894 1895 /* add pivot rows into the active row */ 1896 nzbd = 0; 1897 prow = lnk[n]; 1898 while (prow < i) { 1899 nnz = bdiag[prow]; 1900 cols = bj_ptr[prow] + nnz + 1; 1901 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1902 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1903 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1904 nzi += nlnk; 1905 prow = lnk[prow]; 1906 nzbd++; 1907 } 1908 bdiag[i] = nzbd; 1909 bi[i + 1] = bi[i] + nzi; 1910 1911 /* if free space is not available, make more free space */ 1912 if (current_space->local_remaining < nzi) { 1913 nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 1914 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1915 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1916 reallocs++; 1917 } 1918 1919 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1920 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1921 bj_ptr[i] = current_space->array; 1922 bjlvl_ptr[i] = current_space_lvl->array; 1923 1924 /* make sure the active row i has diagonal entry */ 1925 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1926 1927 current_space->array += nzi; 1928 current_space->local_used += nzi; 1929 current_space->local_remaining -= nzi; 1930 current_space_lvl->array += nzi; 1931 current_space_lvl->local_used += nzi; 1932 current_space_lvl->local_remaining -= nzi; 1933 } 1934 1935 PetscCall(ISRestoreIndices(isrow, &r)); 1936 PetscCall(ISRestoreIndices(isicol, &ic)); 1937 1938 /* destroy list of free space and other temporary arrays */ 1939 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1940 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 1941 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1942 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1943 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1944 1945 #if defined(PETSC_USE_INFO) 1946 { 1947 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1948 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1949 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1950 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1951 PetscCall(PetscInfo(A, "for best performance.\n")); 1952 if (diagonal_fill) { PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); } 1953 } 1954 #endif 1955 1956 /* put together the new matrix */ 1957 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1958 PetscCall(PetscLogObjectParent((PetscObject)fact, (PetscObject)isicol)); 1959 b = (Mat_SeqAIJ *)(fact)->data; 1960 1961 b->free_a = PETSC_TRUE; 1962 b->free_ij = PETSC_TRUE; 1963 b->singlemalloc = PETSC_FALSE; 1964 1965 PetscCall(PetscMalloc1(bi[n], &b->a)); 1966 b->j = bj; 1967 b->i = bi; 1968 for (i = 0; i < n; i++) bdiag[i] += bi[i]; 1969 b->diag = bdiag; 1970 b->ilen = NULL; 1971 b->imax = NULL; 1972 b->row = isrow; 1973 b->col = iscol; 1974 PetscCall(PetscObjectReference((PetscObject)isrow)); 1975 PetscCall(PetscObjectReference((PetscObject)iscol)); 1976 b->icol = isicol; 1977 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1978 /* In b structure: Free imax, ilen, old a, old j. 1979 Allocate bdiag, solve_work, new a, new j */ 1980 PetscCall(PetscLogObjectMemory((PetscObject)fact, (bi[n] - n) * (sizeof(PetscInt) + sizeof(PetscScalar)))); 1981 b->maxnz = b->nz = bi[n]; 1982 1983 (fact)->info.factor_mallocs = reallocs; 1984 (fact)->info.fill_ratio_given = f; 1985 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1986 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1987 if (a->inode.size) { (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) { 1992 Mat C = B; 1993 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1994 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 1995 IS ip = b->row, iip = b->icol; 1996 const PetscInt *rip, *riip; 1997 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1998 PetscInt *ai = a->i, *aj = a->j; 1999 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 2000 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2001 PetscBool perm_identity; 2002 FactorShiftCtx sctx; 2003 PetscReal rs; 2004 MatScalar d, *v; 2005 2006 PetscFunctionBegin; 2007 /* MatPivotSetUp(): initialize shift context sctx */ 2008 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2009 2010 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2011 sctx.shift_top = info->zeropivot; 2012 for (i = 0; i < mbs; i++) { 2013 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2014 d = (aa)[a->diag[i]]; 2015 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2016 v = aa + ai[i]; 2017 nz = ai[i + 1] - ai[i]; 2018 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2019 if (rs > sctx.shift_top) sctx.shift_top = rs; 2020 } 2021 sctx.shift_top *= 1.1; 2022 sctx.nshift_max = 5; 2023 sctx.shift_lo = 0.; 2024 sctx.shift_hi = 1.; 2025 } 2026 2027 PetscCall(ISGetIndices(ip, &rip)); 2028 PetscCall(ISGetIndices(iip, &riip)); 2029 2030 /* allocate working arrays 2031 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2032 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2033 */ 2034 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 2035 2036 do { 2037 sctx.newshift = PETSC_FALSE; 2038 2039 for (i = 0; i < mbs; i++) c2r[i] = mbs; 2040 if (mbs) il[0] = 0; 2041 2042 for (k = 0; k < mbs; k++) { 2043 /* zero rtmp */ 2044 nz = bi[k + 1] - bi[k]; 2045 bjtmp = bj + bi[k]; 2046 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2047 2048 /* load in initial unfactored row */ 2049 bval = ba + bi[k]; 2050 jmin = ai[rip[k]]; 2051 jmax = ai[rip[k] + 1]; 2052 for (j = jmin; j < jmax; j++) { 2053 col = riip[aj[j]]; 2054 if (col >= k) { /* only take upper triangular entry */ 2055 rtmp[col] = aa[j]; 2056 *bval++ = 0.0; /* for in-place factorization */ 2057 } 2058 } 2059 /* shift the diagonal of the matrix: ZeropivotApply() */ 2060 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2061 2062 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2063 dk = rtmp[k]; 2064 i = c2r[k]; /* first row to be added to k_th row */ 2065 2066 while (i < k) { 2067 nexti = c2r[i]; /* next row to be added to k_th row */ 2068 2069 /* compute multiplier, update diag(k) and U(i,k) */ 2070 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2071 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 2072 dk += uikdi * ba[ili]; /* update diag[k] */ 2073 ba[ili] = uikdi; /* -U(i,k) */ 2074 2075 /* add multiple of row i to k-th row */ 2076 jmin = ili + 1; 2077 jmax = bi[i + 1]; 2078 if (jmin < jmax) { 2079 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2080 /* update il and c2r for row i */ 2081 il[i] = jmin; 2082 j = bj[jmin]; 2083 c2r[i] = c2r[j]; 2084 c2r[j] = i; 2085 } 2086 i = nexti; 2087 } 2088 2089 /* copy data into U(k,:) */ 2090 rs = 0.0; 2091 jmin = bi[k]; 2092 jmax = bi[k + 1] - 1; 2093 if (jmin < jmax) { 2094 for (j = jmin; j < jmax; j++) { 2095 col = bj[j]; 2096 ba[j] = rtmp[col]; 2097 rs += PetscAbsScalar(ba[j]); 2098 } 2099 /* add the k-th row into il and c2r */ 2100 il[k] = jmin; 2101 i = bj[jmin]; 2102 c2r[k] = c2r[i]; 2103 c2r[i] = k; 2104 } 2105 2106 /* MatPivotCheck() */ 2107 sctx.rs = rs; 2108 sctx.pv = dk; 2109 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 2110 if (sctx.newshift) break; 2111 dk = sctx.pv; 2112 2113 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 2114 } 2115 } while (sctx.newshift); 2116 2117 PetscCall(PetscFree3(rtmp, il, c2r)); 2118 PetscCall(ISRestoreIndices(ip, &rip)); 2119 PetscCall(ISRestoreIndices(iip, &riip)); 2120 2121 PetscCall(ISIdentity(ip, &perm_identity)); 2122 if (perm_identity) { 2123 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2124 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2125 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2126 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2127 } else { 2128 B->ops->solve = MatSolve_SeqSBAIJ_1; 2129 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2130 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2131 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2132 } 2133 2134 C->assembled = PETSC_TRUE; 2135 C->preallocated = PETSC_TRUE; 2136 2137 PetscCall(PetscLogFlops(C->rmap->n)); 2138 2139 /* MatPivotView() */ 2140 if (sctx.nshift) { 2141 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2142 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2143 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2144 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2145 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2146 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 2147 } 2148 } 2149 PetscFunctionReturn(0); 2150 } 2151 2152 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) { 2153 Mat C = B; 2154 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2155 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2156 IS ip = b->row, iip = b->icol; 2157 const PetscInt *rip, *riip; 2158 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 2159 PetscInt *ai = a->i, *aj = a->j; 2160 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2161 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2162 PetscBool perm_identity; 2163 FactorShiftCtx sctx; 2164 PetscReal rs; 2165 MatScalar d, *v; 2166 2167 PetscFunctionBegin; 2168 /* MatPivotSetUp(): initialize shift context sctx */ 2169 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2170 2171 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2172 sctx.shift_top = info->zeropivot; 2173 for (i = 0; i < mbs; i++) { 2174 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2175 d = (aa)[a->diag[i]]; 2176 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2177 v = aa + ai[i]; 2178 nz = ai[i + 1] - ai[i]; 2179 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2180 if (rs > sctx.shift_top) sctx.shift_top = rs; 2181 } 2182 sctx.shift_top *= 1.1; 2183 sctx.nshift_max = 5; 2184 sctx.shift_lo = 0.; 2185 sctx.shift_hi = 1.; 2186 } 2187 2188 PetscCall(ISGetIndices(ip, &rip)); 2189 PetscCall(ISGetIndices(iip, &riip)); 2190 2191 /* initialization */ 2192 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 2193 2194 do { 2195 sctx.newshift = PETSC_FALSE; 2196 2197 for (i = 0; i < mbs; i++) jl[i] = mbs; 2198 il[0] = 0; 2199 2200 for (k = 0; k < mbs; k++) { 2201 /* zero rtmp */ 2202 nz = bi[k + 1] - bi[k]; 2203 bjtmp = bj + bi[k]; 2204 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2205 2206 bval = ba + bi[k]; 2207 /* initialize k-th row by the perm[k]-th row of A */ 2208 jmin = ai[rip[k]]; 2209 jmax = ai[rip[k] + 1]; 2210 for (j = jmin; j < jmax; j++) { 2211 col = riip[aj[j]]; 2212 if (col >= k) { /* only take upper triangular entry */ 2213 rtmp[col] = aa[j]; 2214 *bval++ = 0.0; /* for in-place factorization */ 2215 } 2216 } 2217 /* shift the diagonal of the matrix */ 2218 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2219 2220 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2221 dk = rtmp[k]; 2222 i = jl[k]; /* first row to be added to k_th row */ 2223 2224 while (i < k) { 2225 nexti = jl[i]; /* next row to be added to k_th row */ 2226 2227 /* compute multiplier, update diag(k) and U(i,k) */ 2228 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2229 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 2230 dk += uikdi * ba[ili]; 2231 ba[ili] = uikdi; /* -U(i,k) */ 2232 2233 /* add multiple of row i to k-th row */ 2234 jmin = ili + 1; 2235 jmax = bi[i + 1]; 2236 if (jmin < jmax) { 2237 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2238 /* update il and jl for row i */ 2239 il[i] = jmin; 2240 j = bj[jmin]; 2241 jl[i] = jl[j]; 2242 jl[j] = i; 2243 } 2244 i = nexti; 2245 } 2246 2247 /* shift the diagonals when zero pivot is detected */ 2248 /* compute rs=sum of abs(off-diagonal) */ 2249 rs = 0.0; 2250 jmin = bi[k] + 1; 2251 nz = bi[k + 1] - jmin; 2252 bcol = bj + jmin; 2253 for (j = 0; j < nz; j++) { rs += PetscAbsScalar(rtmp[bcol[j]]); } 2254 2255 sctx.rs = rs; 2256 sctx.pv = dk; 2257 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 2258 if (sctx.newshift) break; 2259 dk = sctx.pv; 2260 2261 /* copy data into U(k,:) */ 2262 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 2263 jmin = bi[k] + 1; 2264 jmax = bi[k + 1]; 2265 if (jmin < jmax) { 2266 for (j = jmin; j < jmax; j++) { 2267 col = bj[j]; 2268 ba[j] = rtmp[col]; 2269 } 2270 /* add the k-th row into il and jl */ 2271 il[k] = jmin; 2272 i = bj[jmin]; 2273 jl[k] = jl[i]; 2274 jl[i] = k; 2275 } 2276 } 2277 } while (sctx.newshift); 2278 2279 PetscCall(PetscFree3(rtmp, il, jl)); 2280 PetscCall(ISRestoreIndices(ip, &rip)); 2281 PetscCall(ISRestoreIndices(iip, &riip)); 2282 2283 PetscCall(ISIdentity(ip, &perm_identity)); 2284 if (perm_identity) { 2285 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2286 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2287 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2288 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2289 } else { 2290 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2291 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2292 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2293 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2294 } 2295 2296 C->assembled = PETSC_TRUE; 2297 C->preallocated = PETSC_TRUE; 2298 2299 PetscCall(PetscLogFlops(C->rmap->n)); 2300 if (sctx.nshift) { 2301 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2302 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2303 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2304 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2305 } 2306 } 2307 PetscFunctionReturn(0); 2308 } 2309 2310 /* 2311 icc() under revised new data structure. 2312 Factored arrays bj and ba are stored as 2313 U(0,:),...,U(i,:),U(n-1,:) 2314 2315 ui=fact->i is an array of size n+1, in which 2316 ui+ 2317 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2318 ui[n]: points to U(n-1,n-1)+1 2319 2320 udiag=fact->diag is an array of size n,in which 2321 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2322 2323 U(i,:) contains udiag[i] as its last entry, i.e., 2324 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2325 */ 2326 2327 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 2328 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2329 Mat_SeqSBAIJ *b; 2330 PetscBool perm_identity, missing; 2331 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2332 const PetscInt *rip, *riip; 2333 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2334 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2335 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2336 PetscReal fill = info->fill, levels = info->levels; 2337 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2338 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2339 PetscBT lnkbt; 2340 IS iperm; 2341 2342 PetscFunctionBegin; 2343 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2344 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2345 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2346 PetscCall(ISIdentity(perm, &perm_identity)); 2347 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2348 2349 PetscCall(PetscMalloc1(am + 1, &ui)); 2350 PetscCall(PetscMalloc1(am + 1, &udiag)); 2351 ui[0] = 0; 2352 2353 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2354 if (!levels && perm_identity) { 2355 for (i = 0; i < am; i++) { 2356 ncols = ai[i + 1] - a->diag[i]; 2357 ui[i + 1] = ui[i] + ncols; 2358 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2359 } 2360 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2361 cols = uj; 2362 for (i = 0; i < am; i++) { 2363 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2364 ncols = ai[i + 1] - a->diag[i] - 1; 2365 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2366 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2367 } 2368 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2369 PetscCall(ISGetIndices(iperm, &riip)); 2370 PetscCall(ISGetIndices(perm, &rip)); 2371 2372 /* initialization */ 2373 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2374 2375 /* jl: linked list for storing indices of the pivot rows 2376 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2377 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2378 for (i = 0; i < am; i++) { 2379 jl[i] = am; 2380 il[i] = 0; 2381 } 2382 2383 /* create and initialize a linked list for storing column indices of the active row k */ 2384 nlnk = am + 1; 2385 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2386 2387 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2388 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2389 current_space = free_space; 2390 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2391 current_space_lvl = free_space_lvl; 2392 2393 for (k = 0; k < am; k++) { /* for each active row k */ 2394 /* initialize lnk by the column indices of row rip[k] of A */ 2395 nzk = 0; 2396 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2397 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2398 ncols_upper = 0; 2399 for (j = 0; j < ncols; j++) { 2400 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2401 if (riip[i] >= k) { /* only take upper triangular entry */ 2402 ajtmp[ncols_upper] = i; 2403 ncols_upper++; 2404 } 2405 } 2406 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2407 nzk += nlnk; 2408 2409 /* update lnk by computing fill-in for each pivot row to be merged in */ 2410 prow = jl[k]; /* 1st pivot row */ 2411 2412 while (prow < k) { 2413 nextprow = jl[prow]; 2414 2415 /* merge prow into k-th row */ 2416 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2417 jmax = ui[prow + 1]; 2418 ncols = jmax - jmin; 2419 i = jmin - ui[prow]; 2420 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2421 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2422 j = *(uj - 1); 2423 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2424 nzk += nlnk; 2425 2426 /* update il and jl for prow */ 2427 if (jmin < jmax) { 2428 il[prow] = jmin; 2429 j = *cols; 2430 jl[prow] = jl[j]; 2431 jl[j] = prow; 2432 } 2433 prow = nextprow; 2434 } 2435 2436 /* if free space is not available, make more free space */ 2437 if (current_space->local_remaining < nzk) { 2438 i = am - k + 1; /* num of unfactored rows */ 2439 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2440 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2441 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2442 reallocs++; 2443 } 2444 2445 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2446 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2447 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2448 2449 /* add the k-th row into il and jl */ 2450 if (nzk > 1) { 2451 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2452 jl[k] = jl[i]; 2453 jl[i] = k; 2454 il[k] = ui[k] + 1; 2455 } 2456 uj_ptr[k] = current_space->array; 2457 uj_lvl_ptr[k] = current_space_lvl->array; 2458 2459 current_space->array += nzk; 2460 current_space->local_used += nzk; 2461 current_space->local_remaining -= nzk; 2462 2463 current_space_lvl->array += nzk; 2464 current_space_lvl->local_used += nzk; 2465 current_space_lvl->local_remaining -= nzk; 2466 2467 ui[k + 1] = ui[k] + nzk; 2468 } 2469 2470 PetscCall(ISRestoreIndices(perm, &rip)); 2471 PetscCall(ISRestoreIndices(iperm, &riip)); 2472 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2473 PetscCall(PetscFree(ajtmp)); 2474 2475 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2476 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2477 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2478 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2479 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2480 2481 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2482 2483 /* put together the new matrix in MATSEQSBAIJ format */ 2484 b = (Mat_SeqSBAIJ *)(fact)->data; 2485 b->singlemalloc = PETSC_FALSE; 2486 2487 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2488 2489 b->j = uj; 2490 b->i = ui; 2491 b->diag = udiag; 2492 b->free_diag = PETSC_TRUE; 2493 b->ilen = NULL; 2494 b->imax = NULL; 2495 b->row = perm; 2496 b->col = perm; 2497 PetscCall(PetscObjectReference((PetscObject)perm)); 2498 PetscCall(PetscObjectReference((PetscObject)perm)); 2499 b->icol = iperm; 2500 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2501 2502 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2503 PetscCall(PetscLogObjectMemory((PetscObject)fact, ui[am] * (sizeof(PetscInt) + sizeof(MatScalar)))); 2504 2505 b->maxnz = b->nz = ui[am]; 2506 b->free_a = PETSC_TRUE; 2507 b->free_ij = PETSC_TRUE; 2508 2509 fact->info.factor_mallocs = reallocs; 2510 fact->info.fill_ratio_given = fill; 2511 if (ai[am] != 0) { 2512 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2513 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2514 } else { 2515 fact->info.fill_ratio_needed = 0.0; 2516 } 2517 #if defined(PETSC_USE_INFO) 2518 if (ai[am] != 0) { 2519 PetscReal af = fact->info.fill_ratio_needed; 2520 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2521 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2522 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2523 } else { 2524 PetscCall(PetscInfo(A, "Empty matrix\n")); 2525 } 2526 #endif 2527 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2528 PetscFunctionReturn(0); 2529 } 2530 2531 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 2532 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2533 Mat_SeqSBAIJ *b; 2534 PetscBool perm_identity, missing; 2535 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2536 const PetscInt *rip, *riip; 2537 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2538 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2539 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2540 PetscReal fill = info->fill, levels = info->levels; 2541 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2542 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2543 PetscBT lnkbt; 2544 IS iperm; 2545 2546 PetscFunctionBegin; 2547 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2548 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2549 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2550 PetscCall(ISIdentity(perm, &perm_identity)); 2551 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2552 2553 PetscCall(PetscMalloc1(am + 1, &ui)); 2554 PetscCall(PetscMalloc1(am + 1, &udiag)); 2555 ui[0] = 0; 2556 2557 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2558 if (!levels && perm_identity) { 2559 for (i = 0; i < am; i++) { 2560 ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 2561 udiag[i] = ui[i]; 2562 } 2563 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2564 cols = uj; 2565 for (i = 0; i < am; i++) { 2566 aj = a->j + a->diag[i]; 2567 ncols = ui[i + 1] - ui[i]; 2568 for (j = 0; j < ncols; j++) *cols++ = *aj++; 2569 } 2570 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2571 PetscCall(ISGetIndices(iperm, &riip)); 2572 PetscCall(ISGetIndices(perm, &rip)); 2573 2574 /* initialization */ 2575 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2576 2577 /* jl: linked list for storing indices of the pivot rows 2578 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2579 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2580 for (i = 0; i < am; i++) { 2581 jl[i] = am; 2582 il[i] = 0; 2583 } 2584 2585 /* create and initialize a linked list for storing column indices of the active row k */ 2586 nlnk = am + 1; 2587 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2588 2589 /* initial FreeSpace size is fill*(ai[am]+1) */ 2590 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2591 current_space = free_space; 2592 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 2593 current_space_lvl = free_space_lvl; 2594 2595 for (k = 0; k < am; k++) { /* for each active row k */ 2596 /* initialize lnk by the column indices of row rip[k] of A */ 2597 nzk = 0; 2598 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2599 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2600 ncols_upper = 0; 2601 for (j = 0; j < ncols; j++) { 2602 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2603 if (riip[i] >= k) { /* only take upper triangular entry */ 2604 ajtmp[ncols_upper] = i; 2605 ncols_upper++; 2606 } 2607 } 2608 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2609 nzk += nlnk; 2610 2611 /* update lnk by computing fill-in for each pivot row to be merged in */ 2612 prow = jl[k]; /* 1st pivot row */ 2613 2614 while (prow < k) { 2615 nextprow = jl[prow]; 2616 2617 /* merge prow into k-th row */ 2618 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2619 jmax = ui[prow + 1]; 2620 ncols = jmax - jmin; 2621 i = jmin - ui[prow]; 2622 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2623 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2624 j = *(uj - 1); 2625 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2626 nzk += nlnk; 2627 2628 /* update il and jl for prow */ 2629 if (jmin < jmax) { 2630 il[prow] = jmin; 2631 j = *cols; 2632 jl[prow] = jl[j]; 2633 jl[j] = prow; 2634 } 2635 prow = nextprow; 2636 } 2637 2638 /* if free space is not available, make more free space */ 2639 if (current_space->local_remaining < nzk) { 2640 i = am - k + 1; /* num of unfactored rows */ 2641 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2642 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2643 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2644 reallocs++; 2645 } 2646 2647 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2648 PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2649 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2650 2651 /* add the k-th row into il and jl */ 2652 if (nzk > 1) { 2653 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2654 jl[k] = jl[i]; 2655 jl[i] = k; 2656 il[k] = ui[k] + 1; 2657 } 2658 uj_ptr[k] = current_space->array; 2659 uj_lvl_ptr[k] = current_space_lvl->array; 2660 2661 current_space->array += nzk; 2662 current_space->local_used += nzk; 2663 current_space->local_remaining -= nzk; 2664 2665 current_space_lvl->array += nzk; 2666 current_space_lvl->local_used += nzk; 2667 current_space_lvl->local_remaining -= nzk; 2668 2669 ui[k + 1] = ui[k] + nzk; 2670 } 2671 2672 #if defined(PETSC_USE_INFO) 2673 if (ai[am] != 0) { 2674 PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 2675 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2676 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2677 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2678 } else { 2679 PetscCall(PetscInfo(A, "Empty matrix\n")); 2680 } 2681 #endif 2682 2683 PetscCall(ISRestoreIndices(perm, &rip)); 2684 PetscCall(ISRestoreIndices(iperm, &riip)); 2685 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2686 PetscCall(PetscFree(ajtmp)); 2687 2688 /* destroy list of free space and other temporary array(s) */ 2689 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2690 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 2691 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2692 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2693 2694 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2695 2696 /* put together the new matrix in MATSEQSBAIJ format */ 2697 2698 b = (Mat_SeqSBAIJ *)fact->data; 2699 b->singlemalloc = PETSC_FALSE; 2700 2701 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2702 2703 b->j = uj; 2704 b->i = ui; 2705 b->diag = udiag; 2706 b->free_diag = PETSC_TRUE; 2707 b->ilen = NULL; 2708 b->imax = NULL; 2709 b->row = perm; 2710 b->col = perm; 2711 2712 PetscCall(PetscObjectReference((PetscObject)perm)); 2713 PetscCall(PetscObjectReference((PetscObject)perm)); 2714 2715 b->icol = iperm; 2716 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2717 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2718 PetscCall(PetscLogObjectMemory((PetscObject)fact, (ui[am] - am) * (sizeof(PetscInt) + sizeof(MatScalar)))); 2719 b->maxnz = b->nz = ui[am]; 2720 b->free_a = PETSC_TRUE; 2721 b->free_ij = PETSC_TRUE; 2722 2723 fact->info.factor_mallocs = reallocs; 2724 fact->info.fill_ratio_given = fill; 2725 if (ai[am] != 0) { 2726 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 2727 } else { 2728 fact->info.fill_ratio_needed = 0.0; 2729 } 2730 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2731 PetscFunctionReturn(0); 2732 } 2733 2734 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 2735 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2736 Mat_SeqSBAIJ *b; 2737 PetscBool perm_identity, missing; 2738 PetscReal fill = info->fill; 2739 const PetscInt *rip, *riip; 2740 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2741 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2742 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2743 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2744 PetscBT lnkbt; 2745 IS iperm; 2746 2747 PetscFunctionBegin; 2748 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2749 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2750 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2751 2752 /* check whether perm is the identity mapping */ 2753 PetscCall(ISIdentity(perm, &perm_identity)); 2754 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2755 PetscCall(ISGetIndices(iperm, &riip)); 2756 PetscCall(ISGetIndices(perm, &rip)); 2757 2758 /* initialization */ 2759 PetscCall(PetscMalloc1(am + 1, &ui)); 2760 PetscCall(PetscMalloc1(am + 1, &udiag)); 2761 ui[0] = 0; 2762 2763 /* jl: linked list for storing indices of the pivot rows 2764 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2765 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2766 for (i = 0; i < am; i++) { 2767 jl[i] = am; 2768 il[i] = 0; 2769 } 2770 2771 /* create and initialize a linked list for storing column indices of the active row k */ 2772 nlnk = am + 1; 2773 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2774 2775 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2776 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2777 current_space = free_space; 2778 2779 for (k = 0; k < am; k++) { /* for each active row k */ 2780 /* initialize lnk by the column indices of row rip[k] of A */ 2781 nzk = 0; 2782 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2783 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2784 ncols_upper = 0; 2785 for (j = 0; j < ncols; j++) { 2786 i = riip[*(aj + ai[rip[k]] + j)]; 2787 if (i >= k) { /* only take upper triangular entry */ 2788 cols[ncols_upper] = i; 2789 ncols_upper++; 2790 } 2791 } 2792 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2793 nzk += nlnk; 2794 2795 /* update lnk by computing fill-in for each pivot row to be merged in */ 2796 prow = jl[k]; /* 1st pivot row */ 2797 2798 while (prow < k) { 2799 nextprow = jl[prow]; 2800 /* merge prow into k-th row */ 2801 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2802 jmax = ui[prow + 1]; 2803 ncols = jmax - jmin; 2804 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2805 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2806 nzk += nlnk; 2807 2808 /* update il and jl for prow */ 2809 if (jmin < jmax) { 2810 il[prow] = jmin; 2811 j = *uj_ptr; 2812 jl[prow] = jl[j]; 2813 jl[j] = prow; 2814 } 2815 prow = nextprow; 2816 } 2817 2818 /* if free space is not available, make more free space */ 2819 if (current_space->local_remaining < nzk) { 2820 i = am - k + 1; /* num of unfactored rows */ 2821 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2822 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2823 reallocs++; 2824 } 2825 2826 /* copy data into free space, then initialize lnk */ 2827 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2828 2829 /* add the k-th row into il and jl */ 2830 if (nzk > 1) { 2831 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2832 jl[k] = jl[i]; 2833 jl[i] = k; 2834 il[k] = ui[k] + 1; 2835 } 2836 ui_ptr[k] = current_space->array; 2837 2838 current_space->array += nzk; 2839 current_space->local_used += nzk; 2840 current_space->local_remaining -= nzk; 2841 2842 ui[k + 1] = ui[k] + nzk; 2843 } 2844 2845 PetscCall(ISRestoreIndices(perm, &rip)); 2846 PetscCall(ISRestoreIndices(iperm, &riip)); 2847 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2848 2849 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2850 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2851 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2852 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2853 2854 /* put together the new matrix in MATSEQSBAIJ format */ 2855 2856 b = (Mat_SeqSBAIJ *)fact->data; 2857 b->singlemalloc = PETSC_FALSE; 2858 b->free_a = PETSC_TRUE; 2859 b->free_ij = PETSC_TRUE; 2860 2861 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2862 2863 b->j = uj; 2864 b->i = ui; 2865 b->diag = udiag; 2866 b->free_diag = PETSC_TRUE; 2867 b->ilen = NULL; 2868 b->imax = NULL; 2869 b->row = perm; 2870 b->col = perm; 2871 2872 PetscCall(PetscObjectReference((PetscObject)perm)); 2873 PetscCall(PetscObjectReference((PetscObject)perm)); 2874 2875 b->icol = iperm; 2876 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2877 2878 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2879 PetscCall(PetscLogObjectMemory((PetscObject)fact, ui[am] * (sizeof(PetscInt) + sizeof(MatScalar)))); 2880 2881 b->maxnz = b->nz = ui[am]; 2882 2883 fact->info.factor_mallocs = reallocs; 2884 fact->info.fill_ratio_given = fill; 2885 if (ai[am] != 0) { 2886 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2887 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2888 } else { 2889 fact->info.fill_ratio_needed = 0.0; 2890 } 2891 #if defined(PETSC_USE_INFO) 2892 if (ai[am] != 0) { 2893 PetscReal af = fact->info.fill_ratio_needed; 2894 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2895 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2896 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2897 } else { 2898 PetscCall(PetscInfo(A, "Empty matrix\n")); 2899 } 2900 #endif 2901 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2902 PetscFunctionReturn(0); 2903 } 2904 2905 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) { 2906 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2907 Mat_SeqSBAIJ *b; 2908 PetscBool perm_identity, missing; 2909 PetscReal fill = info->fill; 2910 const PetscInt *rip, *riip; 2911 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2912 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2913 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 2914 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2915 PetscBT lnkbt; 2916 IS iperm; 2917 2918 PetscFunctionBegin; 2919 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2920 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2921 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2922 2923 /* check whether perm is the identity mapping */ 2924 PetscCall(ISIdentity(perm, &perm_identity)); 2925 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2926 PetscCall(ISGetIndices(iperm, &riip)); 2927 PetscCall(ISGetIndices(perm, &rip)); 2928 2929 /* initialization */ 2930 PetscCall(PetscMalloc1(am + 1, &ui)); 2931 ui[0] = 0; 2932 2933 /* jl: linked list for storing indices of the pivot rows 2934 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2935 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2936 for (i = 0; i < am; i++) { 2937 jl[i] = am; 2938 il[i] = 0; 2939 } 2940 2941 /* create and initialize a linked list for storing column indices of the active row k */ 2942 nlnk = am + 1; 2943 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2944 2945 /* initial FreeSpace size is fill*(ai[am]+1) */ 2946 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2947 current_space = free_space; 2948 2949 for (k = 0; k < am; k++) { /* for each active row k */ 2950 /* initialize lnk by the column indices of row rip[k] of A */ 2951 nzk = 0; 2952 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2953 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2954 ncols_upper = 0; 2955 for (j = 0; j < ncols; j++) { 2956 i = riip[*(aj + ai[rip[k]] + j)]; 2957 if (i >= k) { /* only take upper triangular entry */ 2958 cols[ncols_upper] = i; 2959 ncols_upper++; 2960 } 2961 } 2962 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2963 nzk += nlnk; 2964 2965 /* update lnk by computing fill-in for each pivot row to be merged in */ 2966 prow = jl[k]; /* 1st pivot row */ 2967 2968 while (prow < k) { 2969 nextprow = jl[prow]; 2970 /* merge prow into k-th row */ 2971 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2972 jmax = ui[prow + 1]; 2973 ncols = jmax - jmin; 2974 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2975 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2976 nzk += nlnk; 2977 2978 /* update il and jl for prow */ 2979 if (jmin < jmax) { 2980 il[prow] = jmin; 2981 j = *uj_ptr; 2982 jl[prow] = jl[j]; 2983 jl[j] = prow; 2984 } 2985 prow = nextprow; 2986 } 2987 2988 /* if free space is not available, make more free space */ 2989 if (current_space->local_remaining < nzk) { 2990 i = am - k + 1; /* num of unfactored rows */ 2991 i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2992 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2993 reallocs++; 2994 } 2995 2996 /* copy data into free space, then initialize lnk */ 2997 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2998 2999 /* add the k-th row into il and jl */ 3000 if (nzk - 1 > 0) { 3001 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3002 jl[k] = jl[i]; 3003 jl[i] = k; 3004 il[k] = ui[k] + 1; 3005 } 3006 ui_ptr[k] = current_space->array; 3007 3008 current_space->array += nzk; 3009 current_space->local_used += nzk; 3010 current_space->local_remaining -= nzk; 3011 3012 ui[k + 1] = ui[k] + nzk; 3013 } 3014 3015 #if defined(PETSC_USE_INFO) 3016 if (ai[am] != 0) { 3017 PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]); 3018 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3019 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3020 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3021 } else { 3022 PetscCall(PetscInfo(A, "Empty matrix\n")); 3023 } 3024 #endif 3025 3026 PetscCall(ISRestoreIndices(perm, &rip)); 3027 PetscCall(ISRestoreIndices(iperm, &riip)); 3028 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 3029 3030 /* destroy list of free space and other temporary array(s) */ 3031 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 3032 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 3033 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3034 3035 /* put together the new matrix in MATSEQSBAIJ format */ 3036 3037 b = (Mat_SeqSBAIJ *)fact->data; 3038 b->singlemalloc = PETSC_FALSE; 3039 b->free_a = PETSC_TRUE; 3040 b->free_ij = PETSC_TRUE; 3041 3042 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 3043 3044 b->j = uj; 3045 b->i = ui; 3046 b->diag = NULL; 3047 b->ilen = NULL; 3048 b->imax = NULL; 3049 b->row = perm; 3050 b->col = perm; 3051 3052 PetscCall(PetscObjectReference((PetscObject)perm)); 3053 PetscCall(PetscObjectReference((PetscObject)perm)); 3054 3055 b->icol = iperm; 3056 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3057 3058 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 3059 PetscCall(PetscLogObjectMemory((PetscObject)fact, (ui[am] - am) * (sizeof(PetscInt) + sizeof(MatScalar)))); 3060 b->maxnz = b->nz = ui[am]; 3061 3062 fact->info.factor_mallocs = reallocs; 3063 fact->info.fill_ratio_given = fill; 3064 if (ai[am] != 0) { 3065 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 3066 } else { 3067 fact->info.fill_ratio_needed = 0.0; 3068 } 3069 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3070 PetscFunctionReturn(0); 3071 } 3072 3073 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) { 3074 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3075 PetscInt n = A->rmap->n; 3076 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3077 PetscScalar *x, sum; 3078 const PetscScalar *b; 3079 const MatScalar *aa = a->a, *v; 3080 PetscInt i, nz; 3081 3082 PetscFunctionBegin; 3083 if (!n) PetscFunctionReturn(0); 3084 3085 PetscCall(VecGetArrayRead(bb, &b)); 3086 PetscCall(VecGetArrayWrite(xx, &x)); 3087 3088 /* forward solve the lower triangular */ 3089 x[0] = b[0]; 3090 v = aa; 3091 vi = aj; 3092 for (i = 1; i < n; i++) { 3093 nz = ai[i + 1] - ai[i]; 3094 sum = b[i]; 3095 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3096 v += nz; 3097 vi += nz; 3098 x[i] = sum; 3099 } 3100 3101 /* backward solve the upper triangular */ 3102 for (i = n - 1; i >= 0; i--) { 3103 v = aa + adiag[i + 1] + 1; 3104 vi = aj + adiag[i + 1] + 1; 3105 nz = adiag[i] - adiag[i + 1] - 1; 3106 sum = x[i]; 3107 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3108 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3109 } 3110 3111 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3112 PetscCall(VecRestoreArrayRead(bb, &b)); 3113 PetscCall(VecRestoreArrayWrite(xx, &x)); 3114 PetscFunctionReturn(0); 3115 } 3116 3117 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) { 3118 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3119 IS iscol = a->col, isrow = a->row; 3120 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3121 const PetscInt *rout, *cout, *r, *c; 3122 PetscScalar *x, *tmp, sum; 3123 const PetscScalar *b; 3124 const MatScalar *aa = a->a, *v; 3125 3126 PetscFunctionBegin; 3127 if (!n) PetscFunctionReturn(0); 3128 3129 PetscCall(VecGetArrayRead(bb, &b)); 3130 PetscCall(VecGetArrayWrite(xx, &x)); 3131 tmp = a->solve_work; 3132 3133 PetscCall(ISGetIndices(isrow, &rout)); 3134 r = rout; 3135 PetscCall(ISGetIndices(iscol, &cout)); 3136 c = cout; 3137 3138 /* forward solve the lower triangular */ 3139 tmp[0] = b[r[0]]; 3140 v = aa; 3141 vi = aj; 3142 for (i = 1; i < n; i++) { 3143 nz = ai[i + 1] - ai[i]; 3144 sum = b[r[i]]; 3145 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3146 tmp[i] = sum; 3147 v += nz; 3148 vi += nz; 3149 } 3150 3151 /* backward solve the upper triangular */ 3152 for (i = n - 1; i >= 0; i--) { 3153 v = aa + adiag[i + 1] + 1; 3154 vi = aj + adiag[i + 1] + 1; 3155 nz = adiag[i] - adiag[i + 1] - 1; 3156 sum = tmp[i]; 3157 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3158 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3159 } 3160 3161 PetscCall(ISRestoreIndices(isrow, &rout)); 3162 PetscCall(ISRestoreIndices(iscol, &cout)); 3163 PetscCall(VecRestoreArrayRead(bb, &b)); 3164 PetscCall(VecRestoreArrayWrite(xx, &x)); 3165 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3166 PetscFunctionReturn(0); 3167 } 3168 3169 /* 3170 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3171 */ 3172 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) { 3173 Mat B = *fact; 3174 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3175 IS isicol; 3176 const PetscInt *r, *ic; 3177 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3178 PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3179 PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 3180 PetscInt nlnk, *lnk; 3181 PetscBT lnkbt; 3182 PetscBool row_identity, icol_identity; 3183 MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 3184 const PetscInt *ics; 3185 PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3186 PetscReal dt = info->dt, shift = info->shiftamount; 3187 PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3188 PetscBool missing; 3189 3190 PetscFunctionBegin; 3191 if (dt == PETSC_DEFAULT) dt = 0.005; 3192 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3193 3194 /* ------- symbolic factorization, can be reused ---------*/ 3195 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3196 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3197 adiag = a->diag; 3198 3199 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3200 3201 /* bdiag is location of diagonal in factor */ 3202 PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 3203 PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3204 3205 /* allocate row pointers bi */ 3206 PetscCall(PetscMalloc1(2 * n + 2, &bi)); 3207 3208 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3209 if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 3210 nnz_max = ai[n] + 2 * n * dtcount + 2; 3211 3212 PetscCall(PetscMalloc1(nnz_max + 1, &bj)); 3213 PetscCall(PetscMalloc1(nnz_max + 1, &ba)); 3214 3215 /* put together the new matrix */ 3216 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3217 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol)); 3218 b = (Mat_SeqAIJ *)B->data; 3219 3220 b->free_a = PETSC_TRUE; 3221 b->free_ij = PETSC_TRUE; 3222 b->singlemalloc = PETSC_FALSE; 3223 3224 b->a = ba; 3225 b->j = bj; 3226 b->i = bi; 3227 b->diag = bdiag; 3228 b->ilen = NULL; 3229 b->imax = NULL; 3230 b->row = isrow; 3231 b->col = iscol; 3232 PetscCall(PetscObjectReference((PetscObject)isrow)); 3233 PetscCall(PetscObjectReference((PetscObject)iscol)); 3234 b->icol = isicol; 3235 3236 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3237 PetscCall(PetscLogObjectMemory((PetscObject)B, nnz_max * (sizeof(PetscInt) + sizeof(MatScalar)))); 3238 b->maxnz = nnz_max; 3239 3240 B->factortype = MAT_FACTOR_ILUDT; 3241 B->info.factor_mallocs = 0; 3242 B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 3243 /* ------- end of symbolic factorization ---------*/ 3244 3245 PetscCall(ISGetIndices(isrow, &r)); 3246 PetscCall(ISGetIndices(isicol, &ic)); 3247 ics = ic; 3248 3249 /* linked list for storing column indices of the active row */ 3250 nlnk = n + 1; 3251 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3252 3253 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3254 PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 3255 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3256 PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 3257 PetscCall(PetscArrayzero(rtmp, n)); 3258 3259 bi[0] = 0; 3260 bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3261 bdiag_rev[n] = bdiag[0]; 3262 bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3263 for (i = 0; i < n; i++) { 3264 /* copy initial fill into linked list */ 3265 nzi = ai[r[i] + 1] - ai[r[i]]; 3266 PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 3267 nzi_al = adiag[r[i]] - ai[r[i]]; 3268 nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3269 ajtmp = aj + ai[r[i]]; 3270 PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3271 3272 /* load in initial (unfactored row) */ 3273 aatmp = a->a + ai[r[i]]; 3274 for (j = 0; j < nzi; j++) { rtmp[ics[*ajtmp++]] = *aatmp++; } 3275 3276 /* add pivot rows into linked list */ 3277 row = lnk[n]; 3278 while (row < i) { 3279 nzi_bl = bi[row + 1] - bi[row] + 1; 3280 bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 3281 PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3282 nzi += nlnk; 3283 row = lnk[row]; 3284 } 3285 3286 /* copy data from lnk into jtmp, then initialize lnk */ 3287 PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3288 3289 /* numerical factorization */ 3290 bjtmp = jtmp; 3291 row = *bjtmp++; /* 1st pivot row */ 3292 while (row < i) { 3293 pc = rtmp + row; 3294 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3295 multiplier = (*pc) * (*pv); 3296 *pc = multiplier; 3297 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3298 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3299 pv = ba + bdiag[row + 1] + 1; 3300 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3301 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3302 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3303 } 3304 row = *bjtmp++; 3305 } 3306 3307 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3308 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3309 nzi_bl = 0; 3310 j = 0; 3311 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3312 vtmp[j] = rtmp[jtmp[j]]; 3313 rtmp[jtmp[j]] = 0.0; 3314 nzi_bl++; 3315 j++; 3316 } 3317 nzi_bu = nzi - nzi_bl - 1; 3318 while (j < nzi) { 3319 vtmp[j] = rtmp[jtmp[j]]; 3320 rtmp[jtmp[j]] = 0.0; 3321 j++; 3322 } 3323 3324 bjtmp = bj + bi[i]; 3325 batmp = ba + bi[i]; 3326 /* apply level dropping rule to L part */ 3327 ncut = nzi_al + dtcount; 3328 if (ncut < nzi_bl) { 3329 PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 3330 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3331 } else { 3332 ncut = nzi_bl; 3333 } 3334 for (j = 0; j < ncut; j++) { 3335 bjtmp[j] = jtmp[j]; 3336 batmp[j] = vtmp[j]; 3337 } 3338 bi[i + 1] = bi[i] + ncut; 3339 nzi = ncut + 1; 3340 3341 /* apply level dropping rule to U part */ 3342 ncut = nzi_au + dtcount; 3343 if (ncut < nzi_bu) { 3344 PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 3345 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 3346 } else { 3347 ncut = nzi_bu; 3348 } 3349 nzi += ncut; 3350 3351 /* mark bdiagonal */ 3352 bdiag[i + 1] = bdiag[i] - (ncut + 1); 3353 bdiag_rev[n - i - 1] = bdiag[i + 1]; 3354 bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 3355 bjtmp = bj + bdiag[i]; 3356 batmp = ba + bdiag[i]; 3357 *bjtmp = i; 3358 *batmp = diag_tmp; /* rtmp[i]; */ 3359 if (*batmp == 0.0) { *batmp = dt + shift; } 3360 *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 3361 3362 bjtmp = bj + bdiag[i + 1] + 1; 3363 batmp = ba + bdiag[i + 1] + 1; 3364 for (k = 0; k < ncut; k++) { 3365 bjtmp[k] = jtmp[nzi_bl + 1 + k]; 3366 batmp[k] = vtmp[nzi_bl + 1 + k]; 3367 } 3368 3369 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3370 } /* for (i=0; i<n; i++) */ 3371 PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3372 3373 PetscCall(ISRestoreIndices(isrow, &r)); 3374 PetscCall(ISRestoreIndices(isicol, &ic)); 3375 3376 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3377 PetscCall(PetscFree2(im, jtmp)); 3378 PetscCall(PetscFree2(rtmp, vtmp)); 3379 PetscCall(PetscFree(bdiag_rev)); 3380 3381 PetscCall(PetscLogFlops(B->cmap->n)); 3382 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3383 3384 PetscCall(ISIdentity(isrow, &row_identity)); 3385 PetscCall(ISIdentity(isicol, &icol_identity)); 3386 if (row_identity && icol_identity) { 3387 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3388 } else { 3389 B->ops->solve = MatSolve_SeqAIJ; 3390 } 3391 3392 B->ops->solveadd = NULL; 3393 B->ops->solvetranspose = NULL; 3394 B->ops->solvetransposeadd = NULL; 3395 B->ops->matsolve = NULL; 3396 B->assembled = PETSC_TRUE; 3397 B->preallocated = PETSC_TRUE; 3398 PetscFunctionReturn(0); 3399 } 3400 3401 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3402 /* 3403 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3404 */ 3405 3406 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) { 3407 PetscFunctionBegin; 3408 PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 3409 PetscFunctionReturn(0); 3410 } 3411 3412 /* 3413 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3414 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3415 */ 3416 /* 3417 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3418 */ 3419 3420 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) { 3421 Mat C = fact; 3422 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 3423 IS isrow = b->row, isicol = b->icol; 3424 const PetscInt *r, *ic, *ics; 3425 PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3426 PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 3427 MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3428 PetscReal dt = info->dt, shift = info->shiftamount; 3429 PetscBool row_identity, col_identity; 3430 3431 PetscFunctionBegin; 3432 PetscCall(ISGetIndices(isrow, &r)); 3433 PetscCall(ISGetIndices(isicol, &ic)); 3434 PetscCall(PetscMalloc1(n + 1, &rtmp)); 3435 ics = ic; 3436 3437 for (i = 0; i < n; i++) { 3438 /* initialize rtmp array */ 3439 nzl = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */ 3440 bjtmp = bj + bi[i]; 3441 for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3442 rtmp[i] = 0.0; 3443 nzu = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */ 3444 bjtmp = bj + bdiag[i + 1] + 1; 3445 for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 3446 3447 /* load in initial unfactored row of A */ 3448 nz = ai[r[i] + 1] - ai[r[i]]; 3449 ajtmp = aj + ai[r[i]]; 3450 v = aa + ai[r[i]]; 3451 for (j = 0; j < nz; j++) { rtmp[ics[*ajtmp++]] = v[j]; } 3452 3453 /* numerical factorization */ 3454 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3455 nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3456 k = 0; 3457 while (k < nzl) { 3458 row = *bjtmp++; 3459 pc = rtmp + row; 3460 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3461 multiplier = (*pc) * (*pv); 3462 *pc = multiplier; 3463 if (PetscAbsScalar(multiplier) > dt) { 3464 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3465 pv = b->a + bdiag[row + 1] + 1; 3466 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3467 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3468 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3469 } 3470 k++; 3471 } 3472 3473 /* finished row so stick it into b->a */ 3474 /* L-part */ 3475 pv = b->a + bi[i]; 3476 pj = bj + bi[i]; 3477 nzl = bi[i + 1] - bi[i]; 3478 for (j = 0; j < nzl; j++) { pv[j] = rtmp[pj[j]]; } 3479 3480 /* diagonal: invert diagonal entries for simpler triangular solves */ 3481 if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3482 b->a[bdiag[i]] = 1.0 / rtmp[i]; 3483 3484 /* U-part */ 3485 pv = b->a + bdiag[i + 1] + 1; 3486 pj = bj + bdiag[i + 1] + 1; 3487 nzu = bdiag[i] - bdiag[i + 1] - 1; 3488 for (j = 0; j < nzu; j++) { pv[j] = rtmp[pj[j]]; } 3489 } 3490 3491 PetscCall(PetscFree(rtmp)); 3492 PetscCall(ISRestoreIndices(isicol, &ic)); 3493 PetscCall(ISRestoreIndices(isrow, &r)); 3494 3495 PetscCall(ISIdentity(isrow, &row_identity)); 3496 PetscCall(ISIdentity(isicol, &col_identity)); 3497 if (row_identity && col_identity) { 3498 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3499 } else { 3500 C->ops->solve = MatSolve_SeqAIJ; 3501 } 3502 C->ops->solveadd = NULL; 3503 C->ops->solvetranspose = NULL; 3504 C->ops->solvetransposeadd = NULL; 3505 C->ops->matsolve = NULL; 3506 C->assembled = PETSC_TRUE; 3507 C->preallocated = PETSC_TRUE; 3508 3509 PetscCall(PetscLogFlops(C->cmap->n)); 3510 PetscFunctionReturn(0); 3511 } 3512