1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petscbt.h> 4 #include <../src/mat/utils/freespace.h> 5 6 /* 7 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 8 9 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 10 */ 11 #if 0 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 15 PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 16 const PetscInt *ai = a->i, *aj = a->j; 17 const PetscScalar *aa = a->a; 18 PetscBool *done; 19 PetscReal best, past = 0, future; 20 21 PetscFunctionBegin; 22 /* pick initial row */ 23 best = -1; 24 for (i = 0; i < n; i++) { 25 future = 0.0; 26 for (j = ai[i]; j < ai[i + 1]; j++) { 27 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28 else past = PetscAbsScalar(aa[j]); 29 } 30 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 31 if (past / future > best) { 32 best = past / future; 33 current = i; 34 } 35 } 36 37 PetscCall(PetscMalloc1(n, &done)); 38 PetscCall(PetscArrayzero(done, n)); 39 PetscCall(PetscMalloc1(n, &order)); 40 order[0] = current; 41 for (i = 0; i < n - 1; i++) { 42 done[current] = PETSC_TRUE; 43 best = -1; 44 /* loop over all neighbors of current pivot */ 45 for (j = ai[current]; j < ai[current + 1]; j++) { 46 jj = aj[j]; 47 if (done[jj]) continue; 48 /* loop over columns of potential next row computing weights for below and above diagonal */ 49 past = future = 0.0; 50 for (k = ai[jj]; k < ai[jj + 1]; k++) { 51 kk = aj[k]; 52 if (done[kk]) past += PetscAbsScalar(aa[k]); 53 else if (kk != jj) future += PetscAbsScalar(aa[k]); 54 } 55 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 56 if (past / future > best) { 57 best = past / future; 58 newcurrent = jj; 59 } 60 } 61 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 62 best = -1; 63 for (k = 0; k < n; k++) { 64 if (done[k]) continue; 65 future = 0.0; 66 past = 0.0; 67 for (j = ai[k]; j < ai[k + 1]; j++) { 68 kk = aj[j]; 69 if (done[kk]) past += PetscAbsScalar(aa[j]); 70 else if (kk != k) future += PetscAbsScalar(aa[j]); 71 } 72 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 73 if (past / future > best) { 74 best = past / future; 75 newcurrent = k; 76 } 77 } 78 } 79 PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 80 current = newcurrent; 81 order[i + 1] = current; 82 } 83 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 84 *icol = *irow; 85 PetscCall(PetscObjectReference((PetscObject)*irow)); 86 PetscCall(PetscFree(done)); 87 PetscCall(PetscFree(order)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 #endif 91 92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 93 { 94 PetscFunctionBegin; 95 *type = MATSOLVERPETSC; 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 100 { 101 PetscInt n = A->rmap->n; 102 103 PetscFunctionBegin; 104 #if defined(PETSC_USE_COMPLEX) 105 if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 106 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 107 *B = NULL; 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 #endif 111 112 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 113 PetscCall(MatSetSizes(*B, n, n, n, n)); 114 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 115 PetscCall(MatSetType(*B, MATSEQAIJ)); 116 117 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 118 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 119 120 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 121 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 122 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 123 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 124 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 125 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 126 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 127 128 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 129 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 130 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 131 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 132 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 133 (*B)->factortype = ftype; 134 135 PetscCall(PetscFree((*B)->solvertype)); 136 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 137 (*B)->canuseordering = PETSC_TRUE; 138 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 #if 0 143 // currently unused 144 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 145 { 146 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 147 IS isicol; 148 const PetscInt *r, *ic; 149 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 150 PetscInt *bi, *bj, *ajtmp; 151 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 152 PetscReal f; 153 PetscInt nlnk, *lnk, k, **bi_ptr; 154 PetscFreeSpaceList free_space = NULL, current_space = NULL; 155 PetscBT lnkbt; 156 PetscBool missing; 157 158 PetscFunctionBegin; 159 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 160 PetscCall(MatMissingDiagonal(A, &missing, &i)); 161 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 162 163 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 164 PetscCall(ISGetIndices(isrow, &r)); 165 PetscCall(ISGetIndices(isicol, &ic)); 166 167 /* get new row pointers */ 168 PetscCall(PetscMalloc1(n + 1, &bi)); 169 bi[0] = 0; 170 171 /* bdiag is location of diagonal in factor */ 172 PetscCall(PetscMalloc1(n + 1, &bdiag)); 173 bdiag[0] = 0; 174 175 /* linked list for storing column indices of the active row */ 176 nlnk = n + 1; 177 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 178 179 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 180 181 /* initial FreeSpace size is f*(ai[n]+1) */ 182 f = info->fill; 183 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 184 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 185 current_space = free_space; 186 187 for (i = 0; i < n; i++) { 188 /* copy previous fill into linked list */ 189 nzi = 0; 190 nnz = ai[r[i] + 1] - ai[r[i]]; 191 ajtmp = aj + ai[r[i]]; 192 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 193 nzi += nlnk; 194 195 /* add pivot rows into linked list */ 196 row = lnk[n]; 197 while (row < i) { 198 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 199 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 200 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 201 nzi += nlnk; 202 row = lnk[row]; 203 } 204 bi[i + 1] = bi[i] + nzi; 205 im[i] = nzi; 206 207 /* mark bdiag */ 208 nzbd = 0; 209 nnz = nzi; 210 k = lnk[n]; 211 while (nnz-- && k < i) { 212 nzbd++; 213 k = lnk[k]; 214 } 215 bdiag[i] = bi[i] + nzbd; 216 217 /* if free space is not available, make more free space */ 218 if (current_space->local_remaining < nzi) { 219 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 220 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 221 reallocs++; 222 } 223 224 /* copy data into free space, then initialize lnk */ 225 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 226 227 bi_ptr[i] = current_space->array; 228 current_space->array += nzi; 229 current_space->local_used += nzi; 230 current_space->local_remaining -= nzi; 231 } 232 #if defined(PETSC_USE_INFO) 233 if (ai[n] != 0) { 234 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 235 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 236 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 237 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 238 PetscCall(PetscInfo(A, "for best performance.\n")); 239 } else { 240 PetscCall(PetscInfo(A, "Empty matrix\n")); 241 } 242 #endif 243 244 PetscCall(ISRestoreIndices(isrow, &r)); 245 PetscCall(ISRestoreIndices(isicol, &ic)); 246 247 /* destroy list of free space and other temporary array(s) */ 248 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 249 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 250 PetscCall(PetscLLDestroy(lnk, lnkbt)); 251 PetscCall(PetscFree2(bi_ptr, im)); 252 253 /* put together the new matrix */ 254 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 255 b = (Mat_SeqAIJ *)(B)->data; 256 257 b->free_a = PETSC_TRUE; 258 b->free_ij = PETSC_TRUE; 259 b->singlemalloc = PETSC_FALSE; 260 261 PetscCall(PetscMalloc1(bi[n] + 1, &b->a)); 262 b->j = bj; 263 b->i = bi; 264 b->diag = bdiag; 265 b->ilen = NULL; 266 b->imax = NULL; 267 b->row = isrow; 268 b->col = iscol; 269 PetscCall(PetscObjectReference((PetscObject)isrow)); 270 PetscCall(PetscObjectReference((PetscObject)iscol)); 271 b->icol = isicol; 272 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 273 274 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 275 b->maxnz = b->nz = bi[n]; 276 277 (B)->factortype = MAT_FACTOR_LU; 278 (B)->info.factor_mallocs = reallocs; 279 (B)->info.fill_ratio_given = f; 280 281 if (ai[n]) { 282 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 283 } else { 284 (B)->info.fill_ratio_needed = 0.0; 285 } 286 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 287 if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 288 PetscFunctionReturn(PETSC_SUCCESS); 289 } 290 #endif 291 292 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 293 { 294 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 295 IS isicol; 296 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 297 PetscInt i, n = A->rmap->n; 298 PetscInt *bi, *bj; 299 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 300 PetscReal f; 301 PetscInt nlnk, *lnk, k, **bi_ptr; 302 PetscFreeSpaceList free_space = NULL, current_space = NULL; 303 PetscBT lnkbt; 304 PetscBool missing; 305 306 PetscFunctionBegin; 307 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 308 PetscCall(MatMissingDiagonal(A, &missing, &i)); 309 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 310 311 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 312 PetscCall(ISGetIndices(isrow, &r)); 313 PetscCall(ISGetIndices(isicol, &ic)); 314 315 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 316 PetscCall(PetscMalloc1(n + 1, &bi)); 317 PetscCall(PetscMalloc1(n + 1, &bdiag)); 318 bi[0] = bdiag[0] = 0; 319 320 /* linked list for storing column indices of the active row */ 321 nlnk = n + 1; 322 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 323 324 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 325 326 /* initial FreeSpace size is f*(ai[n]+1) */ 327 f = info->fill; 328 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 329 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 330 current_space = free_space; 331 332 for (i = 0; i < n; i++) { 333 /* copy previous fill into linked list */ 334 nzi = 0; 335 nnz = ai[r[i] + 1] - ai[r[i]]; 336 ajtmp = aj + ai[r[i]]; 337 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 338 nzi += nlnk; 339 340 /* add pivot rows into linked list */ 341 row = lnk[n]; 342 while (row < i) { 343 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 344 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 345 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 346 nzi += nlnk; 347 row = lnk[row]; 348 } 349 bi[i + 1] = bi[i] + nzi; 350 im[i] = nzi; 351 352 /* mark bdiag */ 353 nzbd = 0; 354 nnz = nzi; 355 k = lnk[n]; 356 while (nnz-- && k < i) { 357 nzbd++; 358 k = lnk[k]; 359 } 360 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 361 362 /* if free space is not available, make more free space */ 363 if (current_space->local_remaining < nzi) { 364 /* estimated additional space needed */ 365 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 366 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 367 reallocs++; 368 } 369 370 /* copy data into free space, then initialize lnk */ 371 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 372 373 bi_ptr[i] = current_space->array; 374 current_space->array += nzi; 375 current_space->local_used += nzi; 376 current_space->local_remaining -= nzi; 377 } 378 379 PetscCall(ISRestoreIndices(isrow, &r)); 380 PetscCall(ISRestoreIndices(isicol, &ic)); 381 382 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 383 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 384 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 385 PetscCall(PetscLLDestroy(lnk, lnkbt)); 386 PetscCall(PetscFree2(bi_ptr, im)); 387 388 /* put together the new matrix */ 389 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 390 b = (Mat_SeqAIJ *)(B)->data; 391 392 b->free_a = PETSC_TRUE; 393 b->free_ij = PETSC_TRUE; 394 b->singlemalloc = PETSC_FALSE; 395 396 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 397 398 b->j = bj; 399 b->i = bi; 400 b->diag = bdiag; 401 b->ilen = NULL; 402 b->imax = NULL; 403 b->row = isrow; 404 b->col = iscol; 405 PetscCall(PetscObjectReference((PetscObject)isrow)); 406 PetscCall(PetscObjectReference((PetscObject)iscol)); 407 b->icol = isicol; 408 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 409 410 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 411 b->maxnz = b->nz = bdiag[0] + 1; 412 413 B->factortype = MAT_FACTOR_LU; 414 B->info.factor_mallocs = reallocs; 415 B->info.fill_ratio_given = f; 416 417 if (ai[n]) { 418 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 419 } else { 420 B->info.fill_ratio_needed = 0.0; 421 } 422 #if defined(PETSC_USE_INFO) 423 if (ai[n] != 0) { 424 PetscReal af = B->info.fill_ratio_needed; 425 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 426 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 427 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 428 PetscCall(PetscInfo(A, "for best performance.\n")); 429 } else { 430 PetscCall(PetscInfo(A, "Empty matrix\n")); 431 } 432 #endif 433 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 434 if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 435 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 436 PetscFunctionReturn(PETSC_SUCCESS); 437 } 438 439 /* 440 Trouble in factorization, should we dump the original matrix? 441 */ 442 PetscErrorCode MatFactorDumpMatrix(Mat A) 443 { 444 PetscBool flg = PETSC_FALSE; 445 446 PetscFunctionBegin; 447 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 448 if (flg) { 449 PetscViewer viewer; 450 char filename[PETSC_MAX_PATH_LEN]; 451 452 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 453 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 454 PetscCall(MatView(A, viewer)); 455 PetscCall(PetscViewerDestroy(&viewer)); 456 } 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 461 { 462 Mat C = B; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 464 IS isrow = b->row, isicol = b->icol; 465 const PetscInt *r, *ic, *ics; 466 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 467 PetscInt i, j, k, nz, nzL, row, *pj; 468 const PetscInt *ajtmp, *bjtmp; 469 MatScalar *rtmp, *pc, multiplier, *pv; 470 const MatScalar *aa = a->a, *v; 471 PetscBool row_identity, col_identity; 472 FactorShiftCtx sctx; 473 const PetscInt *ddiag; 474 PetscReal rs; 475 MatScalar d; 476 477 PetscFunctionBegin; 478 /* MatPivotSetUp(): initialize shift context sctx */ 479 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 480 481 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 482 ddiag = a->diag; 483 sctx.shift_top = info->zeropivot; 484 for (i = 0; i < n; i++) { 485 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 486 d = (aa)[ddiag[i]]; 487 rs = -PetscAbsScalar(d) - PetscRealPart(d); 488 v = aa + ai[i]; 489 nz = ai[i + 1] - ai[i]; 490 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 491 if (rs > sctx.shift_top) sctx.shift_top = rs; 492 } 493 sctx.shift_top *= 1.1; 494 sctx.nshift_max = 5; 495 sctx.shift_lo = 0.; 496 sctx.shift_hi = 1.; 497 } 498 499 PetscCall(ISGetIndices(isrow, &r)); 500 PetscCall(ISGetIndices(isicol, &ic)); 501 PetscCall(PetscMalloc1(n + 1, &rtmp)); 502 ics = ic; 503 504 do { 505 sctx.newshift = PETSC_FALSE; 506 for (i = 0; i < n; i++) { 507 /* zero rtmp */ 508 /* L part */ 509 nz = bi[i + 1] - bi[i]; 510 bjtmp = bj + bi[i]; 511 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 512 513 /* U part */ 514 nz = bdiag[i] - bdiag[i + 1]; 515 bjtmp = bj + bdiag[i + 1] + 1; 516 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 517 518 /* load in initial (unfactored row) */ 519 nz = ai[r[i] + 1] - ai[r[i]]; 520 ajtmp = aj + ai[r[i]]; 521 v = aa + ai[r[i]]; 522 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 523 /* ZeropivotApply() */ 524 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 525 526 /* elimination */ 527 bjtmp = bj + bi[i]; 528 row = *bjtmp++; 529 nzL = bi[i + 1] - bi[i]; 530 for (k = 0; k < nzL; k++) { 531 pc = rtmp + row; 532 if (*pc != 0.0) { 533 pv = b->a + bdiag[row]; 534 multiplier = *pc * (*pv); 535 *pc = multiplier; 536 537 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 538 pv = b->a + bdiag[row + 1] + 1; 539 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 540 541 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 542 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 543 } 544 row = *bjtmp++; 545 } 546 547 /* finished row so stick it into b->a */ 548 rs = 0.0; 549 /* L part */ 550 pv = b->a + bi[i]; 551 pj = b->j + bi[i]; 552 nz = bi[i + 1] - bi[i]; 553 for (j = 0; j < nz; j++) { 554 pv[j] = rtmp[pj[j]]; 555 rs += PetscAbsScalar(pv[j]); 556 } 557 558 /* U part */ 559 pv = b->a + bdiag[i + 1] + 1; 560 pj = b->j + bdiag[i + 1] + 1; 561 nz = bdiag[i] - bdiag[i + 1] - 1; 562 for (j = 0; j < nz; j++) { 563 pv[j] = rtmp[pj[j]]; 564 rs += PetscAbsScalar(pv[j]); 565 } 566 567 sctx.rs = rs; 568 sctx.pv = rtmp[i]; 569 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 570 if (sctx.newshift) break; /* break for-loop */ 571 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 572 573 /* Mark diagonal and invert diagonal for simpler triangular solves */ 574 pv = b->a + bdiag[i]; 575 *pv = 1.0 / rtmp[i]; 576 577 } /* endof for (i=0; i<n; i++) { */ 578 579 /* MatPivotRefine() */ 580 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 581 /* 582 * if no shift in this attempt & shifting & started shifting & can refine, 583 * then try lower shift 584 */ 585 sctx.shift_hi = sctx.shift_fraction; 586 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 587 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 588 sctx.newshift = PETSC_TRUE; 589 sctx.nshift++; 590 } 591 } while (sctx.newshift); 592 593 PetscCall(PetscFree(rtmp)); 594 PetscCall(ISRestoreIndices(isicol, &ic)); 595 PetscCall(ISRestoreIndices(isrow, &r)); 596 597 PetscCall(ISIdentity(isrow, &row_identity)); 598 PetscCall(ISIdentity(isicol, &col_identity)); 599 if (b->inode.size) { 600 C->ops->solve = MatSolve_SeqAIJ_Inode; 601 } else if (row_identity && col_identity) { 602 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 603 } else { 604 C->ops->solve = MatSolve_SeqAIJ; 605 } 606 C->ops->solveadd = MatSolveAdd_SeqAIJ; 607 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 608 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 609 C->ops->matsolve = MatMatSolve_SeqAIJ; 610 C->assembled = PETSC_TRUE; 611 C->preallocated = PETSC_TRUE; 612 613 PetscCall(PetscLogFlops(C->cmap->n)); 614 615 /* MatShiftView(A,info,&sctx) */ 616 if (sctx.nshift) { 617 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 618 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 619 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 620 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 621 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 622 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 623 } 624 } 625 PetscFunctionReturn(PETSC_SUCCESS); 626 } 627 628 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 629 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 630 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 631 632 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 633 { 634 Mat C = B; 635 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 636 IS isrow = b->row, isicol = b->icol; 637 const PetscInt *r, *ic, *ics; 638 PetscInt nz, row, i, j, n = A->rmap->n, diag; 639 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 640 const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 641 MatScalar *pv, *rtmp, *pc, multiplier, d; 642 const MatScalar *v, *aa = a->a; 643 PetscReal rs = 0.0; 644 FactorShiftCtx sctx; 645 const PetscInt *ddiag; 646 PetscBool row_identity, col_identity; 647 648 PetscFunctionBegin; 649 /* MatPivotSetUp(): initialize shift context sctx */ 650 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 651 652 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 653 ddiag = a->diag; 654 sctx.shift_top = info->zeropivot; 655 for (i = 0; i < n; i++) { 656 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 657 d = (aa)[ddiag[i]]; 658 rs = -PetscAbsScalar(d) - PetscRealPart(d); 659 v = aa + ai[i]; 660 nz = ai[i + 1] - ai[i]; 661 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 662 if (rs > sctx.shift_top) sctx.shift_top = rs; 663 } 664 sctx.shift_top *= 1.1; 665 sctx.nshift_max = 5; 666 sctx.shift_lo = 0.; 667 sctx.shift_hi = 1.; 668 } 669 670 PetscCall(ISGetIndices(isrow, &r)); 671 PetscCall(ISGetIndices(isicol, &ic)); 672 PetscCall(PetscMalloc1(n + 1, &rtmp)); 673 ics = ic; 674 675 do { 676 sctx.newshift = PETSC_FALSE; 677 for (i = 0; i < n; i++) { 678 nz = bi[i + 1] - bi[i]; 679 bjtmp = bj + bi[i]; 680 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 681 682 /* load in initial (unfactored row) */ 683 nz = ai[r[i] + 1] - ai[r[i]]; 684 ajtmp = aj + ai[r[i]]; 685 v = aa + ai[r[i]]; 686 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 687 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 688 689 row = *bjtmp++; 690 while (row < i) { 691 pc = rtmp + row; 692 if (*pc != 0.0) { 693 pv = b->a + diag_offset[row]; 694 pj = b->j + diag_offset[row] + 1; 695 multiplier = *pc / *pv++; 696 *pc = multiplier; 697 nz = bi[row + 1] - diag_offset[row] - 1; 698 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 699 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 700 } 701 row = *bjtmp++; 702 } 703 /* finished row so stick it into b->a */ 704 pv = b->a + bi[i]; 705 pj = b->j + bi[i]; 706 nz = bi[i + 1] - bi[i]; 707 diag = diag_offset[i] - bi[i]; 708 rs = 0.0; 709 for (j = 0; j < nz; j++) { 710 pv[j] = rtmp[pj[j]]; 711 rs += PetscAbsScalar(pv[j]); 712 } 713 rs -= PetscAbsScalar(pv[diag]); 714 715 sctx.rs = rs; 716 sctx.pv = pv[diag]; 717 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 718 if (sctx.newshift) break; 719 pv[diag] = sctx.pv; 720 } 721 722 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 723 /* 724 * if no shift in this attempt & shifting & started shifting & can refine, 725 * then try lower shift 726 */ 727 sctx.shift_hi = sctx.shift_fraction; 728 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 729 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 730 sctx.newshift = PETSC_TRUE; 731 sctx.nshift++; 732 } 733 } while (sctx.newshift); 734 735 /* invert diagonal entries for simpler triangular solves */ 736 for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; 737 PetscCall(PetscFree(rtmp)); 738 PetscCall(ISRestoreIndices(isicol, &ic)); 739 PetscCall(ISRestoreIndices(isrow, &r)); 740 741 PetscCall(ISIdentity(isrow, &row_identity)); 742 PetscCall(ISIdentity(isicol, &col_identity)); 743 if (row_identity && col_identity) { 744 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 745 } else { 746 C->ops->solve = MatSolve_SeqAIJ_inplace; 747 } 748 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 749 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 750 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 751 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 752 753 C->assembled = PETSC_TRUE; 754 C->preallocated = PETSC_TRUE; 755 756 PetscCall(PetscLogFlops(C->cmap->n)); 757 if (sctx.nshift) { 758 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 759 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 760 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 761 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 762 } 763 } 764 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 765 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 766 767 PetscCall(MatSeqAIJCheckInode(C)); 768 PetscFunctionReturn(PETSC_SUCCESS); 769 } 770 771 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 772 773 /* 774 This routine implements inplace ILU(0) with row or/and column permutations. 775 Input: 776 A - original matrix 777 Output; 778 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 779 a->j (col index) is permuted by the inverse of colperm, then sorted 780 a->a reordered accordingly with a->j 781 a->diag (ptr to diagonal elements) is updated. 782 */ 783 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 784 { 785 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 786 IS isrow = a->row, isicol = a->icol; 787 const PetscInt *r, *ic, *ics; 788 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 789 PetscInt *ajtmp, nz, row; 790 PetscInt *diag = a->diag, nbdiag, *pj; 791 PetscScalar *rtmp, *pc, multiplier, d; 792 MatScalar *pv, *v; 793 PetscReal rs; 794 FactorShiftCtx sctx; 795 const MatScalar *aa = a->a, *vtmp; 796 797 PetscFunctionBegin; 798 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 799 800 /* MatPivotSetUp(): initialize shift context sctx */ 801 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 802 803 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 804 const PetscInt *ddiag = a->diag; 805 sctx.shift_top = info->zeropivot; 806 for (i = 0; i < n; i++) { 807 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 808 d = (aa)[ddiag[i]]; 809 rs = -PetscAbsScalar(d) - PetscRealPart(d); 810 vtmp = aa + ai[i]; 811 nz = ai[i + 1] - ai[i]; 812 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 813 if (rs > sctx.shift_top) sctx.shift_top = rs; 814 } 815 sctx.shift_top *= 1.1; 816 sctx.nshift_max = 5; 817 sctx.shift_lo = 0.; 818 sctx.shift_hi = 1.; 819 } 820 821 PetscCall(ISGetIndices(isrow, &r)); 822 PetscCall(ISGetIndices(isicol, &ic)); 823 PetscCall(PetscMalloc1(n + 1, &rtmp)); 824 PetscCall(PetscArrayzero(rtmp, n + 1)); 825 ics = ic; 826 827 #if defined(MV) 828 sctx.shift_top = 0.; 829 sctx.nshift_max = 0; 830 sctx.shift_lo = 0.; 831 sctx.shift_hi = 0.; 832 sctx.shift_fraction = 0.; 833 834 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 835 sctx.shift_top = 0.; 836 for (i = 0; i < n; i++) { 837 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 838 d = (a->a)[diag[i]]; 839 rs = -PetscAbsScalar(d) - PetscRealPart(d); 840 v = a->a + ai[i]; 841 nz = ai[i + 1] - ai[i]; 842 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 843 if (rs > sctx.shift_top) sctx.shift_top = rs; 844 } 845 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 846 sctx.shift_top *= 1.1; 847 sctx.nshift_max = 5; 848 sctx.shift_lo = 0.; 849 sctx.shift_hi = 1.; 850 } 851 852 sctx.shift_amount = 0.; 853 sctx.nshift = 0; 854 #endif 855 856 do { 857 sctx.newshift = PETSC_FALSE; 858 for (i = 0; i < n; i++) { 859 /* load in initial unfactored row */ 860 nz = ai[r[i] + 1] - ai[r[i]]; 861 ajtmp = aj + ai[r[i]]; 862 v = a->a + ai[r[i]]; 863 /* sort permuted ajtmp and values v accordingly */ 864 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 865 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 866 867 diag[r[i]] = ai[r[i]]; 868 for (j = 0; j < nz; j++) { 869 rtmp[ajtmp[j]] = v[j]; 870 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 871 } 872 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 873 874 row = *ajtmp++; 875 while (row < i) { 876 pc = rtmp + row; 877 if (*pc != 0.0) { 878 pv = a->a + diag[r[row]]; 879 pj = aj + diag[r[row]] + 1; 880 881 multiplier = *pc / *pv++; 882 *pc = multiplier; 883 nz = ai[r[row] + 1] - diag[r[row]] - 1; 884 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 885 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 886 } 887 row = *ajtmp++; 888 } 889 /* finished row so overwrite it onto a->a */ 890 pv = a->a + ai[r[i]]; 891 pj = aj + ai[r[i]]; 892 nz = ai[r[i] + 1] - ai[r[i]]; 893 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 894 895 rs = 0.0; 896 for (j = 0; j < nz; j++) { 897 pv[j] = rtmp[pj[j]]; 898 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 899 } 900 901 sctx.rs = rs; 902 sctx.pv = pv[nbdiag]; 903 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 904 if (sctx.newshift) break; 905 pv[nbdiag] = sctx.pv; 906 } 907 908 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 909 /* 910 * if no shift in this attempt & shifting & started shifting & can refine, 911 * then try lower shift 912 */ 913 sctx.shift_hi = sctx.shift_fraction; 914 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 915 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 916 sctx.newshift = PETSC_TRUE; 917 sctx.nshift++; 918 } 919 } while (sctx.newshift); 920 921 /* invert diagonal entries for simpler triangular solves */ 922 for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; 923 924 PetscCall(PetscFree(rtmp)); 925 PetscCall(ISRestoreIndices(isicol, &ic)); 926 PetscCall(ISRestoreIndices(isrow, &r)); 927 928 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 929 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 930 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 931 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 932 933 A->assembled = PETSC_TRUE; 934 A->preallocated = PETSC_TRUE; 935 936 PetscCall(PetscLogFlops(A->cmap->n)); 937 if (sctx.nshift) { 938 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 939 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 940 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 941 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 942 } 943 } 944 PetscFunctionReturn(PETSC_SUCCESS); 945 } 946 947 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 948 { 949 Mat C; 950 951 PetscFunctionBegin; 952 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 953 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 954 PetscCall(MatLUFactorNumeric(C, A, info)); 955 956 A->ops->solve = C->ops->solve; 957 A->ops->solvetranspose = C->ops->solvetranspose; 958 959 PetscCall(MatHeaderMerge(A, &C)); 960 PetscFunctionReturn(PETSC_SUCCESS); 961 } 962 963 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 964 { 965 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 966 IS iscol = a->col, isrow = a->row; 967 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 968 PetscInt nz; 969 const PetscInt *rout, *cout, *r, *c; 970 PetscScalar *x, *tmp, *tmps, sum; 971 const PetscScalar *b; 972 const MatScalar *aa = a->a, *v; 973 974 PetscFunctionBegin; 975 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 976 977 PetscCall(VecGetArrayRead(bb, &b)); 978 PetscCall(VecGetArrayWrite(xx, &x)); 979 tmp = a->solve_work; 980 981 PetscCall(ISGetIndices(isrow, &rout)); 982 r = rout; 983 PetscCall(ISGetIndices(iscol, &cout)); 984 c = cout + (n - 1); 985 986 /* forward solve the lower triangular */ 987 tmp[0] = b[*r++]; 988 tmps = tmp; 989 for (i = 1; i < n; i++) { 990 v = aa + ai[i]; 991 vi = aj + ai[i]; 992 nz = a->diag[i] - ai[i]; 993 sum = b[*r++]; 994 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 995 tmp[i] = sum; 996 } 997 998 /* backward solve the upper triangular */ 999 for (i = n - 1; i >= 0; i--) { 1000 v = aa + a->diag[i] + 1; 1001 vi = aj + a->diag[i] + 1; 1002 nz = ai[i + 1] - a->diag[i] - 1; 1003 sum = tmp[i]; 1004 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1005 x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 1006 } 1007 1008 PetscCall(ISRestoreIndices(isrow, &rout)); 1009 PetscCall(ISRestoreIndices(iscol, &cout)); 1010 PetscCall(VecRestoreArrayRead(bb, &b)); 1011 PetscCall(VecRestoreArrayWrite(xx, &x)); 1012 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 1017 { 1018 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1019 IS iscol = a->col, isrow = a->row; 1020 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1021 PetscInt nz, neq, ldb, ldx; 1022 const PetscInt *rout, *cout, *r, *c; 1023 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1024 const PetscScalar *b, *aa = a->a, *v; 1025 PetscBool isdense; 1026 1027 PetscFunctionBegin; 1028 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1029 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1030 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1031 if (X != B) { 1032 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1033 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1034 } 1035 PetscCall(MatDenseGetArrayRead(B, &b)); 1036 PetscCall(MatDenseGetLDA(B, &ldb)); 1037 PetscCall(MatDenseGetArray(X, &x)); 1038 PetscCall(MatDenseGetLDA(X, &ldx)); 1039 PetscCall(ISGetIndices(isrow, &rout)); 1040 r = rout; 1041 PetscCall(ISGetIndices(iscol, &cout)); 1042 c = cout; 1043 for (neq = 0; neq < B->cmap->n; neq++) { 1044 /* forward solve the lower triangular */ 1045 tmp[0] = b[r[0]]; 1046 tmps = tmp; 1047 for (i = 1; i < n; i++) { 1048 v = aa + ai[i]; 1049 vi = aj + ai[i]; 1050 nz = a->diag[i] - ai[i]; 1051 sum = b[r[i]]; 1052 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1053 tmp[i] = sum; 1054 } 1055 /* backward solve the upper triangular */ 1056 for (i = n - 1; i >= 0; i--) { 1057 v = aa + a->diag[i] + 1; 1058 vi = aj + a->diag[i] + 1; 1059 nz = ai[i + 1] - a->diag[i] - 1; 1060 sum = tmp[i]; 1061 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1062 x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 1063 } 1064 b += ldb; 1065 x += ldx; 1066 } 1067 PetscCall(ISRestoreIndices(isrow, &rout)); 1068 PetscCall(ISRestoreIndices(iscol, &cout)); 1069 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1070 PetscCall(MatDenseRestoreArray(X, &x)); 1071 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1072 PetscFunctionReturn(PETSC_SUCCESS); 1073 } 1074 1075 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 1076 { 1077 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1078 IS iscol = a->col, isrow = a->row; 1079 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1080 PetscInt nz, neq, ldb, ldx; 1081 const PetscInt *rout, *cout, *r, *c; 1082 PetscScalar *x, *tmp = a->solve_work, sum; 1083 const PetscScalar *b, *aa = a->a, *v; 1084 PetscBool isdense; 1085 1086 PetscFunctionBegin; 1087 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1088 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1089 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1090 if (X != B) { 1091 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1092 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1093 } 1094 PetscCall(MatDenseGetArrayRead(B, &b)); 1095 PetscCall(MatDenseGetLDA(B, &ldb)); 1096 PetscCall(MatDenseGetArray(X, &x)); 1097 PetscCall(MatDenseGetLDA(X, &ldx)); 1098 PetscCall(ISGetIndices(isrow, &rout)); 1099 r = rout; 1100 PetscCall(ISGetIndices(iscol, &cout)); 1101 c = cout; 1102 for (neq = 0; neq < B->cmap->n; neq++) { 1103 /* forward solve the lower triangular */ 1104 tmp[0] = b[r[0]]; 1105 v = aa; 1106 vi = aj; 1107 for (i = 1; i < n; i++) { 1108 nz = ai[i + 1] - ai[i]; 1109 sum = b[r[i]]; 1110 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1111 tmp[i] = sum; 1112 v += nz; 1113 vi += nz; 1114 } 1115 /* backward solve the upper triangular */ 1116 for (i = n - 1; i >= 0; i--) { 1117 v = aa + adiag[i + 1] + 1; 1118 vi = aj + adiag[i + 1] + 1; 1119 nz = adiag[i] - adiag[i + 1] - 1; 1120 sum = tmp[i]; 1121 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1122 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 1123 } 1124 b += ldb; 1125 x += ldx; 1126 } 1127 PetscCall(ISRestoreIndices(isrow, &rout)); 1128 PetscCall(ISRestoreIndices(iscol, &cout)); 1129 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1130 PetscCall(MatDenseRestoreArray(X, &x)); 1131 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 1136 { 1137 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1138 IS iscol = a->col, isrow = a->row; 1139 const PetscInt *r, *c, *rout, *cout; 1140 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1141 PetscInt nz, row; 1142 PetscScalar *x, *tmp, *tmps, sum; 1143 const PetscScalar *b; 1144 const MatScalar *aa = a->a, *v; 1145 1146 PetscFunctionBegin; 1147 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1148 1149 PetscCall(VecGetArrayRead(bb, &b)); 1150 PetscCall(VecGetArrayWrite(xx, &x)); 1151 tmp = a->solve_work; 1152 1153 PetscCall(ISGetIndices(isrow, &rout)); 1154 r = rout; 1155 PetscCall(ISGetIndices(iscol, &cout)); 1156 c = cout + (n - 1); 1157 1158 /* forward solve the lower triangular */ 1159 tmp[0] = b[*r++]; 1160 tmps = tmp; 1161 for (row = 1; row < n; row++) { 1162 i = rout[row]; /* permuted row */ 1163 v = aa + ai[i]; 1164 vi = aj + ai[i]; 1165 nz = a->diag[i] - ai[i]; 1166 sum = b[*r++]; 1167 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1168 tmp[row] = sum; 1169 } 1170 1171 /* backward solve the upper triangular */ 1172 for (row = n - 1; row >= 0; row--) { 1173 i = rout[row]; /* permuted row */ 1174 v = aa + a->diag[i] + 1; 1175 vi = aj + a->diag[i] + 1; 1176 nz = ai[i + 1] - a->diag[i] - 1; 1177 sum = tmp[row]; 1178 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1179 x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1180 } 1181 1182 PetscCall(ISRestoreIndices(isrow, &rout)); 1183 PetscCall(ISRestoreIndices(iscol, &cout)); 1184 PetscCall(VecRestoreArrayRead(bb, &b)); 1185 PetscCall(VecRestoreArrayWrite(xx, &x)); 1186 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1187 PetscFunctionReturn(PETSC_SUCCESS); 1188 } 1189 1190 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1191 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1192 { 1193 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1194 PetscInt n = A->rmap->n; 1195 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1196 PetscScalar *x; 1197 const PetscScalar *b; 1198 const MatScalar *aa = a->a; 1199 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1200 PetscInt adiag_i, i, nz, ai_i; 1201 const PetscInt *vi; 1202 const MatScalar *v; 1203 PetscScalar sum; 1204 #endif 1205 1206 PetscFunctionBegin; 1207 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1208 1209 PetscCall(VecGetArrayRead(bb, &b)); 1210 PetscCall(VecGetArrayWrite(xx, &x)); 1211 1212 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1213 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1214 #else 1215 /* forward solve the lower triangular */ 1216 x[0] = b[0]; 1217 for (i = 1; i < n; i++) { 1218 ai_i = ai[i]; 1219 v = aa + ai_i; 1220 vi = aj + ai_i; 1221 nz = adiag[i] - ai_i; 1222 sum = b[i]; 1223 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1224 x[i] = sum; 1225 } 1226 1227 /* backward solve the upper triangular */ 1228 for (i = n - 1; i >= 0; i--) { 1229 adiag_i = adiag[i]; 1230 v = aa + adiag_i + 1; 1231 vi = aj + adiag_i + 1; 1232 nz = ai[i + 1] - adiag_i - 1; 1233 sum = x[i]; 1234 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1235 x[i] = sum * aa[adiag_i]; 1236 } 1237 #endif 1238 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1239 PetscCall(VecRestoreArrayRead(bb, &b)); 1240 PetscCall(VecRestoreArrayWrite(xx, &x)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1245 { 1246 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1247 IS iscol = a->col, isrow = a->row; 1248 PetscInt i, n = A->rmap->n, j; 1249 PetscInt nz; 1250 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 1251 PetscScalar *x, *tmp, sum; 1252 const PetscScalar *b; 1253 const MatScalar *aa = a->a, *v; 1254 1255 PetscFunctionBegin; 1256 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1257 1258 PetscCall(VecGetArrayRead(bb, &b)); 1259 PetscCall(VecGetArray(xx, &x)); 1260 tmp = a->solve_work; 1261 1262 PetscCall(ISGetIndices(isrow, &rout)); 1263 r = rout; 1264 PetscCall(ISGetIndices(iscol, &cout)); 1265 c = cout + (n - 1); 1266 1267 /* forward solve the lower triangular */ 1268 tmp[0] = b[*r++]; 1269 for (i = 1; i < n; i++) { 1270 v = aa + ai[i]; 1271 vi = aj + ai[i]; 1272 nz = a->diag[i] - ai[i]; 1273 sum = b[*r++]; 1274 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1275 tmp[i] = sum; 1276 } 1277 1278 /* backward solve the upper triangular */ 1279 for (i = n - 1; i >= 0; i--) { 1280 v = aa + a->diag[i] + 1; 1281 vi = aj + a->diag[i] + 1; 1282 nz = ai[i + 1] - a->diag[i] - 1; 1283 sum = tmp[i]; 1284 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1285 tmp[i] = sum * aa[a->diag[i]]; 1286 x[*c--] += tmp[i]; 1287 } 1288 1289 PetscCall(ISRestoreIndices(isrow, &rout)); 1290 PetscCall(ISRestoreIndices(iscol, &cout)); 1291 PetscCall(VecRestoreArrayRead(bb, &b)); 1292 PetscCall(VecRestoreArray(xx, &x)); 1293 PetscCall(PetscLogFlops(2.0 * a->nz)); 1294 PetscFunctionReturn(PETSC_SUCCESS); 1295 } 1296 1297 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1298 { 1299 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1300 IS iscol = a->col, isrow = a->row; 1301 PetscInt i, n = A->rmap->n, j; 1302 PetscInt nz; 1303 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1304 PetscScalar *x, *tmp, sum; 1305 const PetscScalar *b; 1306 const MatScalar *aa = a->a, *v; 1307 1308 PetscFunctionBegin; 1309 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1310 1311 PetscCall(VecGetArrayRead(bb, &b)); 1312 PetscCall(VecGetArray(xx, &x)); 1313 tmp = a->solve_work; 1314 1315 PetscCall(ISGetIndices(isrow, &rout)); 1316 r = rout; 1317 PetscCall(ISGetIndices(iscol, &cout)); 1318 c = cout; 1319 1320 /* forward solve the lower triangular */ 1321 tmp[0] = b[r[0]]; 1322 v = aa; 1323 vi = aj; 1324 for (i = 1; i < n; i++) { 1325 nz = ai[i + 1] - ai[i]; 1326 sum = b[r[i]]; 1327 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1328 tmp[i] = sum; 1329 v += nz; 1330 vi += nz; 1331 } 1332 1333 /* backward solve the upper triangular */ 1334 v = aa + adiag[n - 1]; 1335 vi = aj + adiag[n - 1]; 1336 for (i = n - 1; i >= 0; i--) { 1337 nz = adiag[i] - adiag[i + 1] - 1; 1338 sum = tmp[i]; 1339 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1340 tmp[i] = sum * v[nz]; 1341 x[c[i]] += tmp[i]; 1342 v += nz + 1; 1343 vi += nz + 1; 1344 } 1345 1346 PetscCall(ISRestoreIndices(isrow, &rout)); 1347 PetscCall(ISRestoreIndices(iscol, &cout)); 1348 PetscCall(VecRestoreArrayRead(bb, &b)); 1349 PetscCall(VecRestoreArray(xx, &x)); 1350 PetscCall(PetscLogFlops(2.0 * a->nz)); 1351 PetscFunctionReturn(PETSC_SUCCESS); 1352 } 1353 1354 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1355 { 1356 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1357 IS iscol = a->col, isrow = a->row; 1358 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1359 PetscInt i, n = A->rmap->n, j; 1360 PetscInt nz; 1361 PetscScalar *x, *tmp, s1; 1362 const MatScalar *aa = a->a, *v; 1363 const PetscScalar *b; 1364 1365 PetscFunctionBegin; 1366 PetscCall(VecGetArrayRead(bb, &b)); 1367 PetscCall(VecGetArrayWrite(xx, &x)); 1368 tmp = a->solve_work; 1369 1370 PetscCall(ISGetIndices(isrow, &rout)); 1371 r = rout; 1372 PetscCall(ISGetIndices(iscol, &cout)); 1373 c = cout; 1374 1375 /* copy the b into temp work space according to permutation */ 1376 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1377 1378 /* forward solve the U^T */ 1379 for (i = 0; i < n; i++) { 1380 v = aa + diag[i]; 1381 vi = aj + diag[i] + 1; 1382 nz = ai[i + 1] - diag[i] - 1; 1383 s1 = tmp[i]; 1384 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1385 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1386 tmp[i] = s1; 1387 } 1388 1389 /* backward solve the L^T */ 1390 for (i = n - 1; i >= 0; i--) { 1391 v = aa + diag[i] - 1; 1392 vi = aj + diag[i] - 1; 1393 nz = diag[i] - ai[i]; 1394 s1 = tmp[i]; 1395 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1396 } 1397 1398 /* copy tmp into x according to permutation */ 1399 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1400 1401 PetscCall(ISRestoreIndices(isrow, &rout)); 1402 PetscCall(ISRestoreIndices(iscol, &cout)); 1403 PetscCall(VecRestoreArrayRead(bb, &b)); 1404 PetscCall(VecRestoreArrayWrite(xx, &x)); 1405 1406 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1407 PetscFunctionReturn(PETSC_SUCCESS); 1408 } 1409 1410 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1411 { 1412 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1413 IS iscol = a->col, isrow = a->row; 1414 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1415 PetscInt i, n = A->rmap->n, j; 1416 PetscInt nz; 1417 PetscScalar *x, *tmp, s1; 1418 const MatScalar *aa = a->a, *v; 1419 const PetscScalar *b; 1420 1421 PetscFunctionBegin; 1422 PetscCall(VecGetArrayRead(bb, &b)); 1423 PetscCall(VecGetArrayWrite(xx, &x)); 1424 tmp = a->solve_work; 1425 1426 PetscCall(ISGetIndices(isrow, &rout)); 1427 r = rout; 1428 PetscCall(ISGetIndices(iscol, &cout)); 1429 c = cout; 1430 1431 /* copy the b into temp work space according to permutation */ 1432 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1433 1434 /* forward solve the U^T */ 1435 for (i = 0; i < n; i++) { 1436 v = aa + adiag[i + 1] + 1; 1437 vi = aj + adiag[i + 1] + 1; 1438 nz = adiag[i] - adiag[i + 1] - 1; 1439 s1 = tmp[i]; 1440 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1441 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1442 tmp[i] = s1; 1443 } 1444 1445 /* backward solve the L^T */ 1446 for (i = n - 1; i >= 0; i--) { 1447 v = aa + ai[i]; 1448 vi = aj + ai[i]; 1449 nz = ai[i + 1] - ai[i]; 1450 s1 = tmp[i]; 1451 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1452 } 1453 1454 /* copy tmp into x according to permutation */ 1455 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1456 1457 PetscCall(ISRestoreIndices(isrow, &rout)); 1458 PetscCall(ISRestoreIndices(iscol, &cout)); 1459 PetscCall(VecRestoreArrayRead(bb, &b)); 1460 PetscCall(VecRestoreArrayWrite(xx, &x)); 1461 1462 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1467 { 1468 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1469 IS iscol = a->col, isrow = a->row; 1470 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1471 PetscInt i, n = A->rmap->n, j; 1472 PetscInt nz; 1473 PetscScalar *x, *tmp, s1; 1474 const MatScalar *aa = a->a, *v; 1475 const PetscScalar *b; 1476 1477 PetscFunctionBegin; 1478 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1479 PetscCall(VecGetArrayRead(bb, &b)); 1480 PetscCall(VecGetArray(xx, &x)); 1481 tmp = a->solve_work; 1482 1483 PetscCall(ISGetIndices(isrow, &rout)); 1484 r = rout; 1485 PetscCall(ISGetIndices(iscol, &cout)); 1486 c = cout; 1487 1488 /* copy the b into temp work space according to permutation */ 1489 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1490 1491 /* forward solve the U^T */ 1492 for (i = 0; i < n; i++) { 1493 v = aa + diag[i]; 1494 vi = aj + diag[i] + 1; 1495 nz = ai[i + 1] - diag[i] - 1; 1496 s1 = tmp[i]; 1497 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1498 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1499 tmp[i] = s1; 1500 } 1501 1502 /* backward solve the L^T */ 1503 for (i = n - 1; i >= 0; i--) { 1504 v = aa + diag[i] - 1; 1505 vi = aj + diag[i] - 1; 1506 nz = diag[i] - ai[i]; 1507 s1 = tmp[i]; 1508 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1509 } 1510 1511 /* copy tmp into x according to permutation */ 1512 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1513 1514 PetscCall(ISRestoreIndices(isrow, &rout)); 1515 PetscCall(ISRestoreIndices(iscol, &cout)); 1516 PetscCall(VecRestoreArrayRead(bb, &b)); 1517 PetscCall(VecRestoreArray(xx, &x)); 1518 1519 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1520 PetscFunctionReturn(PETSC_SUCCESS); 1521 } 1522 1523 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1524 { 1525 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1526 IS iscol = a->col, isrow = a->row; 1527 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1528 PetscInt i, n = A->rmap->n, j; 1529 PetscInt nz; 1530 PetscScalar *x, *tmp, s1; 1531 const MatScalar *aa = a->a, *v; 1532 const PetscScalar *b; 1533 1534 PetscFunctionBegin; 1535 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1536 PetscCall(VecGetArrayRead(bb, &b)); 1537 PetscCall(VecGetArray(xx, &x)); 1538 tmp = a->solve_work; 1539 1540 PetscCall(ISGetIndices(isrow, &rout)); 1541 r = rout; 1542 PetscCall(ISGetIndices(iscol, &cout)); 1543 c = cout; 1544 1545 /* copy the b into temp work space according to permutation */ 1546 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1547 1548 /* forward solve the U^T */ 1549 for (i = 0; i < n; i++) { 1550 v = aa + adiag[i + 1] + 1; 1551 vi = aj + adiag[i + 1] + 1; 1552 nz = adiag[i] - adiag[i + 1] - 1; 1553 s1 = tmp[i]; 1554 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1555 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1556 tmp[i] = s1; 1557 } 1558 1559 /* backward solve the L^T */ 1560 for (i = n - 1; i >= 0; i--) { 1561 v = aa + ai[i]; 1562 vi = aj + ai[i]; 1563 nz = ai[i + 1] - ai[i]; 1564 s1 = tmp[i]; 1565 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1566 } 1567 1568 /* copy tmp into x according to permutation */ 1569 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1570 1571 PetscCall(ISRestoreIndices(isrow, &rout)); 1572 PetscCall(ISRestoreIndices(iscol, &cout)); 1573 PetscCall(VecRestoreArrayRead(bb, &b)); 1574 PetscCall(VecRestoreArray(xx, &x)); 1575 1576 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 /* 1581 ilu() under revised new data structure. 1582 Factored arrays bj and ba are stored as 1583 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1584 1585 bi=fact->i is an array of size n+1, in which 1586 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1587 bi[n]: points to L(n-1,n-1)+1 1588 1589 bdiag=fact->diag is an array of size n+1,in which 1590 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1591 bdiag[n]: points to entry of U(n-1,0)-1 1592 1593 U(i,:) contains bdiag[i] as its last entry, i.e., 1594 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1595 */ 1596 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1597 { 1598 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1599 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1600 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1601 IS isicol; 1602 1603 PetscFunctionBegin; 1604 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1605 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1606 b = (Mat_SeqAIJ *)(fact)->data; 1607 1608 /* allocate matrix arrays for new data structure */ 1609 PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i)); 1610 1611 b->singlemalloc = PETSC_TRUE; 1612 if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1613 bdiag = b->diag; 1614 1615 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1616 1617 /* set bi and bj with new data structure */ 1618 bi = b->i; 1619 bj = b->j; 1620 1621 /* L part */ 1622 bi[0] = 0; 1623 for (i = 0; i < n; i++) { 1624 nz = adiag[i] - ai[i]; 1625 bi[i + 1] = bi[i] + nz; 1626 aj = a->j + ai[i]; 1627 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1628 } 1629 1630 /* U part */ 1631 bdiag[n] = bi[n] - 1; 1632 for (i = n - 1; i >= 0; i--) { 1633 nz = ai[i + 1] - adiag[i] - 1; 1634 aj = a->j + adiag[i] + 1; 1635 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1636 /* diag[i] */ 1637 bj[k++] = i; 1638 bdiag[i] = bdiag[i + 1] + nz + 1; 1639 } 1640 1641 fact->factortype = MAT_FACTOR_ILU; 1642 fact->info.factor_mallocs = 0; 1643 fact->info.fill_ratio_given = info->fill; 1644 fact->info.fill_ratio_needed = 1.0; 1645 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1646 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1647 1648 b = (Mat_SeqAIJ *)(fact)->data; 1649 b->row = isrow; 1650 b->col = iscol; 1651 b->icol = isicol; 1652 PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 1653 PetscCall(PetscObjectReference((PetscObject)isrow)); 1654 PetscCall(PetscObjectReference((PetscObject)iscol)); 1655 PetscFunctionReturn(PETSC_SUCCESS); 1656 } 1657 1658 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1659 { 1660 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1661 IS isicol; 1662 const PetscInt *r, *ic; 1663 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1664 PetscInt *bi, *cols, nnz, *cols_lvl; 1665 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1666 PetscInt i, levels, diagonal_fill; 1667 PetscBool col_identity, row_identity, missing; 1668 PetscReal f; 1669 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1670 PetscBT lnkbt; 1671 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1672 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1673 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1674 1675 PetscFunctionBegin; 1676 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1677 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1678 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1679 1680 levels = (PetscInt)info->levels; 1681 PetscCall(ISIdentity(isrow, &row_identity)); 1682 PetscCall(ISIdentity(iscol, &col_identity)); 1683 if (!levels && row_identity && col_identity) { 1684 /* special case: ilu(0) with natural ordering */ 1685 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1686 if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1687 PetscFunctionReturn(PETSC_SUCCESS); 1688 } 1689 1690 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1691 PetscCall(ISGetIndices(isrow, &r)); 1692 PetscCall(ISGetIndices(isicol, &ic)); 1693 1694 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1695 PetscCall(PetscMalloc1(n + 1, &bi)); 1696 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1697 bi[0] = bdiag[0] = 0; 1698 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1699 1700 /* create a linked list for storing column indices of the active row */ 1701 nlnk = n + 1; 1702 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1703 1704 /* initial FreeSpace size is f*(ai[n]+1) */ 1705 f = info->fill; 1706 diagonal_fill = (PetscInt)info->diagonal_fill; 1707 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1708 current_space = free_space; 1709 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1710 current_space_lvl = free_space_lvl; 1711 for (i = 0; i < n; i++) { 1712 nzi = 0; 1713 /* copy current row into linked list */ 1714 nnz = ai[r[i] + 1] - ai[r[i]]; 1715 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1716 cols = aj + ai[r[i]]; 1717 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1718 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1719 nzi += nlnk; 1720 1721 /* make sure diagonal entry is included */ 1722 if (diagonal_fill && lnk[i] == -1) { 1723 fm = n; 1724 while (lnk[fm] < i) fm = lnk[fm]; 1725 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1726 lnk[fm] = i; 1727 lnk_lvl[i] = 0; 1728 nzi++; 1729 dcount++; 1730 } 1731 1732 /* add pivot rows into the active row */ 1733 nzbd = 0; 1734 prow = lnk[n]; 1735 while (prow < i) { 1736 nnz = bdiag[prow]; 1737 cols = bj_ptr[prow] + nnz + 1; 1738 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1739 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1740 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1741 nzi += nlnk; 1742 prow = lnk[prow]; 1743 nzbd++; 1744 } 1745 bdiag[i] = nzbd; 1746 bi[i + 1] = bi[i] + nzi; 1747 /* if free space is not available, make more free space */ 1748 if (current_space->local_remaining < nzi) { 1749 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1750 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1751 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1752 reallocs++; 1753 } 1754 1755 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1756 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1757 bj_ptr[i] = current_space->array; 1758 bjlvl_ptr[i] = current_space_lvl->array; 1759 1760 /* make sure the active row i has diagonal entry */ 1761 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1762 1763 current_space->array += nzi; 1764 current_space->local_used += nzi; 1765 current_space->local_remaining -= nzi; 1766 current_space_lvl->array += nzi; 1767 current_space_lvl->local_used += nzi; 1768 current_space_lvl->local_remaining -= nzi; 1769 } 1770 1771 PetscCall(ISRestoreIndices(isrow, &r)); 1772 PetscCall(ISRestoreIndices(isicol, &ic)); 1773 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1774 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1775 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1776 1777 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1778 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1779 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1780 1781 #if defined(PETSC_USE_INFO) 1782 { 1783 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1784 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1785 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1786 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1787 PetscCall(PetscInfo(A, "for best performance.\n")); 1788 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1789 } 1790 #endif 1791 /* put together the new matrix */ 1792 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1793 b = (Mat_SeqAIJ *)(fact)->data; 1794 1795 b->free_a = PETSC_TRUE; 1796 b->free_ij = PETSC_TRUE; 1797 b->singlemalloc = PETSC_FALSE; 1798 1799 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 1800 1801 b->j = bj; 1802 b->i = bi; 1803 b->diag = bdiag; 1804 b->ilen = NULL; 1805 b->imax = NULL; 1806 b->row = isrow; 1807 b->col = iscol; 1808 PetscCall(PetscObjectReference((PetscObject)isrow)); 1809 PetscCall(PetscObjectReference((PetscObject)iscol)); 1810 b->icol = isicol; 1811 1812 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1813 /* In b structure: Free imax, ilen, old a, old j. 1814 Allocate bdiag, solve_work, new a, new j */ 1815 b->maxnz = b->nz = bdiag[0] + 1; 1816 1817 (fact)->info.factor_mallocs = reallocs; 1818 (fact)->info.fill_ratio_given = f; 1819 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1820 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1821 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1822 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1823 PetscFunctionReturn(PETSC_SUCCESS); 1824 } 1825 1826 #if 0 1827 // unused 1828 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1829 { 1830 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1831 IS isicol; 1832 const PetscInt *r, *ic; 1833 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1834 PetscInt *bi, *cols, nnz, *cols_lvl; 1835 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1836 PetscInt i, levels, diagonal_fill; 1837 PetscBool col_identity, row_identity; 1838 PetscReal f; 1839 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1840 PetscBT lnkbt; 1841 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1842 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1843 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1844 PetscBool missing; 1845 1846 PetscFunctionBegin; 1847 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1848 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1849 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1850 1851 f = info->fill; 1852 levels = (PetscInt)info->levels; 1853 diagonal_fill = (PetscInt)info->diagonal_fill; 1854 1855 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1856 1857 PetscCall(ISIdentity(isrow, &row_identity)); 1858 PetscCall(ISIdentity(iscol, &col_identity)); 1859 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1860 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 1861 1862 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1863 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1864 fact->factortype = MAT_FACTOR_ILU; 1865 (fact)->info.factor_mallocs = 0; 1866 (fact)->info.fill_ratio_given = info->fill; 1867 (fact)->info.fill_ratio_needed = 1.0; 1868 1869 b = (Mat_SeqAIJ *)(fact)->data; 1870 b->row = isrow; 1871 b->col = iscol; 1872 b->icol = isicol; 1873 PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 1874 PetscCall(PetscObjectReference((PetscObject)isrow)); 1875 PetscCall(PetscObjectReference((PetscObject)iscol)); 1876 PetscFunctionReturn(PETSC_SUCCESS); 1877 } 1878 1879 PetscCall(ISGetIndices(isrow, &r)); 1880 PetscCall(ISGetIndices(isicol, &ic)); 1881 1882 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1883 PetscCall(PetscMalloc1(n + 1, &bi)); 1884 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1885 bi[0] = bdiag[0] = 0; 1886 1887 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1888 1889 /* create a linked list for storing column indices of the active row */ 1890 nlnk = n + 1; 1891 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1892 1893 /* initial FreeSpace size is f*(ai[n]+1) */ 1894 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1895 current_space = free_space; 1896 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1897 current_space_lvl = free_space_lvl; 1898 1899 for (i = 0; i < n; i++) { 1900 nzi = 0; 1901 /* copy current row into linked list */ 1902 nnz = ai[r[i] + 1] - ai[r[i]]; 1903 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1904 cols = aj + ai[r[i]]; 1905 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1906 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1907 nzi += nlnk; 1908 1909 /* make sure diagonal entry is included */ 1910 if (diagonal_fill && lnk[i] == -1) { 1911 fm = n; 1912 while (lnk[fm] < i) fm = lnk[fm]; 1913 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1914 lnk[fm] = i; 1915 lnk_lvl[i] = 0; 1916 nzi++; 1917 dcount++; 1918 } 1919 1920 /* add pivot rows into the active row */ 1921 nzbd = 0; 1922 prow = lnk[n]; 1923 while (prow < i) { 1924 nnz = bdiag[prow]; 1925 cols = bj_ptr[prow] + nnz + 1; 1926 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1927 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1928 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1929 nzi += nlnk; 1930 prow = lnk[prow]; 1931 nzbd++; 1932 } 1933 bdiag[i] = nzbd; 1934 bi[i + 1] = bi[i] + nzi; 1935 1936 /* if free space is not available, make more free space */ 1937 if (current_space->local_remaining < nzi) { 1938 nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 1939 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1940 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1941 reallocs++; 1942 } 1943 1944 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1945 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1946 bj_ptr[i] = current_space->array; 1947 bjlvl_ptr[i] = current_space_lvl->array; 1948 1949 /* make sure the active row i has diagonal entry */ 1950 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1951 1952 current_space->array += nzi; 1953 current_space->local_used += nzi; 1954 current_space->local_remaining -= nzi; 1955 current_space_lvl->array += nzi; 1956 current_space_lvl->local_used += nzi; 1957 current_space_lvl->local_remaining -= nzi; 1958 } 1959 1960 PetscCall(ISRestoreIndices(isrow, &r)); 1961 PetscCall(ISRestoreIndices(isicol, &ic)); 1962 1963 /* destroy list of free space and other temporary arrays */ 1964 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1965 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 1966 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1967 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1968 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1969 1970 #if defined(PETSC_USE_INFO) 1971 { 1972 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 1973 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1974 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1975 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1976 PetscCall(PetscInfo(A, "for best performance.\n")); 1977 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1978 } 1979 #endif 1980 1981 /* put together the new matrix */ 1982 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1983 b = (Mat_SeqAIJ *)(fact)->data; 1984 1985 b->free_a = PETSC_TRUE; 1986 b->free_ij = PETSC_TRUE; 1987 b->singlemalloc = PETSC_FALSE; 1988 1989 PetscCall(PetscMalloc1(bi[n], &b->a)); 1990 b->j = bj; 1991 b->i = bi; 1992 for (i = 0; i < n; i++) bdiag[i] += bi[i]; 1993 b->diag = bdiag; 1994 b->ilen = NULL; 1995 b->imax = NULL; 1996 b->row = isrow; 1997 b->col = iscol; 1998 PetscCall(PetscObjectReference((PetscObject)isrow)); 1999 PetscCall(PetscObjectReference((PetscObject)iscol)); 2000 b->icol = isicol; 2001 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2002 /* In b structure: Free imax, ilen, old a, old j. 2003 Allocate bdiag, solve_work, new a, new j */ 2004 b->maxnz = b->nz = bi[n]; 2005 2006 (fact)->info.factor_mallocs = reallocs; 2007 (fact)->info.fill_ratio_given = f; 2008 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2009 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2010 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2011 PetscFunctionReturn(PETSC_SUCCESS); 2012 } 2013 #endif 2014 2015 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 2016 { 2017 Mat C = B; 2018 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2019 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2020 IS ip = b->row, iip = b->icol; 2021 const PetscInt *rip, *riip; 2022 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 2023 PetscInt *ai = a->i, *aj = a->j; 2024 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 2025 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2026 PetscBool perm_identity; 2027 FactorShiftCtx sctx; 2028 PetscReal rs; 2029 MatScalar d, *v; 2030 2031 PetscFunctionBegin; 2032 /* MatPivotSetUp(): initialize shift context sctx */ 2033 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2034 2035 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2036 sctx.shift_top = info->zeropivot; 2037 for (i = 0; i < mbs; i++) { 2038 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2039 d = (aa)[a->diag[i]]; 2040 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2041 v = aa + ai[i]; 2042 nz = ai[i + 1] - ai[i]; 2043 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2044 if (rs > sctx.shift_top) sctx.shift_top = rs; 2045 } 2046 sctx.shift_top *= 1.1; 2047 sctx.nshift_max = 5; 2048 sctx.shift_lo = 0.; 2049 sctx.shift_hi = 1.; 2050 } 2051 2052 PetscCall(ISGetIndices(ip, &rip)); 2053 PetscCall(ISGetIndices(iip, &riip)); 2054 2055 /* allocate working arrays 2056 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2057 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2058 */ 2059 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 2060 2061 do { 2062 sctx.newshift = PETSC_FALSE; 2063 2064 for (i = 0; i < mbs; i++) c2r[i] = mbs; 2065 if (mbs) il[0] = 0; 2066 2067 for (k = 0; k < mbs; k++) { 2068 /* zero rtmp */ 2069 nz = bi[k + 1] - bi[k]; 2070 bjtmp = bj + bi[k]; 2071 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2072 2073 /* load in initial unfactored row */ 2074 bval = ba + bi[k]; 2075 jmin = ai[rip[k]]; 2076 jmax = ai[rip[k] + 1]; 2077 for (j = jmin; j < jmax; j++) { 2078 col = riip[aj[j]]; 2079 if (col >= k) { /* only take upper triangular entry */ 2080 rtmp[col] = aa[j]; 2081 *bval++ = 0.0; /* for in-place factorization */ 2082 } 2083 } 2084 /* shift the diagonal of the matrix: ZeropivotApply() */ 2085 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2086 2087 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2088 dk = rtmp[k]; 2089 i = c2r[k]; /* first row to be added to k_th row */ 2090 2091 while (i < k) { 2092 nexti = c2r[i]; /* next row to be added to k_th row */ 2093 2094 /* compute multiplier, update diag(k) and U(i,k) */ 2095 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2096 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 2097 dk += uikdi * ba[ili]; /* update diag[k] */ 2098 ba[ili] = uikdi; /* -U(i,k) */ 2099 2100 /* add multiple of row i to k-th row */ 2101 jmin = ili + 1; 2102 jmax = bi[i + 1]; 2103 if (jmin < jmax) { 2104 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2105 /* update il and c2r for row i */ 2106 il[i] = jmin; 2107 j = bj[jmin]; 2108 c2r[i] = c2r[j]; 2109 c2r[j] = i; 2110 } 2111 i = nexti; 2112 } 2113 2114 /* copy data into U(k,:) */ 2115 rs = 0.0; 2116 jmin = bi[k]; 2117 jmax = bi[k + 1] - 1; 2118 if (jmin < jmax) { 2119 for (j = jmin; j < jmax; j++) { 2120 col = bj[j]; 2121 ba[j] = rtmp[col]; 2122 rs += PetscAbsScalar(ba[j]); 2123 } 2124 /* add the k-th row into il and c2r */ 2125 il[k] = jmin; 2126 i = bj[jmin]; 2127 c2r[k] = c2r[i]; 2128 c2r[i] = k; 2129 } 2130 2131 /* MatPivotCheck() */ 2132 sctx.rs = rs; 2133 sctx.pv = dk; 2134 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 2135 if (sctx.newshift) break; 2136 dk = sctx.pv; 2137 2138 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 2139 } 2140 } while (sctx.newshift); 2141 2142 PetscCall(PetscFree3(rtmp, il, c2r)); 2143 PetscCall(ISRestoreIndices(ip, &rip)); 2144 PetscCall(ISRestoreIndices(iip, &riip)); 2145 2146 PetscCall(ISIdentity(ip, &perm_identity)); 2147 if (perm_identity) { 2148 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2149 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2150 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2151 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2152 } else { 2153 B->ops->solve = MatSolve_SeqSBAIJ_1; 2154 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2155 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2156 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2157 } 2158 2159 C->assembled = PETSC_TRUE; 2160 C->preallocated = PETSC_TRUE; 2161 2162 PetscCall(PetscLogFlops(C->rmap->n)); 2163 2164 /* MatPivotView() */ 2165 if (sctx.nshift) { 2166 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2167 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2168 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2169 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2170 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2171 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 2172 } 2173 } 2174 PetscFunctionReturn(PETSC_SUCCESS); 2175 } 2176 2177 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 2178 { 2179 Mat C = B; 2180 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2181 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2182 IS ip = b->row, iip = b->icol; 2183 const PetscInt *rip, *riip; 2184 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 2185 PetscInt *ai = a->i, *aj = a->j; 2186 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2187 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2188 PetscBool perm_identity; 2189 FactorShiftCtx sctx; 2190 PetscReal rs; 2191 MatScalar d, *v; 2192 2193 PetscFunctionBegin; 2194 /* MatPivotSetUp(): initialize shift context sctx */ 2195 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2196 2197 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2198 sctx.shift_top = info->zeropivot; 2199 for (i = 0; i < mbs; i++) { 2200 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2201 d = (aa)[a->diag[i]]; 2202 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2203 v = aa + ai[i]; 2204 nz = ai[i + 1] - ai[i]; 2205 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2206 if (rs > sctx.shift_top) sctx.shift_top = rs; 2207 } 2208 sctx.shift_top *= 1.1; 2209 sctx.nshift_max = 5; 2210 sctx.shift_lo = 0.; 2211 sctx.shift_hi = 1.; 2212 } 2213 2214 PetscCall(ISGetIndices(ip, &rip)); 2215 PetscCall(ISGetIndices(iip, &riip)); 2216 2217 /* initialization */ 2218 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 2219 2220 do { 2221 sctx.newshift = PETSC_FALSE; 2222 2223 for (i = 0; i < mbs; i++) jl[i] = mbs; 2224 il[0] = 0; 2225 2226 for (k = 0; k < mbs; k++) { 2227 /* zero rtmp */ 2228 nz = bi[k + 1] - bi[k]; 2229 bjtmp = bj + bi[k]; 2230 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2231 2232 bval = ba + bi[k]; 2233 /* initialize k-th row by the perm[k]-th row of A */ 2234 jmin = ai[rip[k]]; 2235 jmax = ai[rip[k] + 1]; 2236 for (j = jmin; j < jmax; j++) { 2237 col = riip[aj[j]]; 2238 if (col >= k) { /* only take upper triangular entry */ 2239 rtmp[col] = aa[j]; 2240 *bval++ = 0.0; /* for in-place factorization */ 2241 } 2242 } 2243 /* shift the diagonal of the matrix */ 2244 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2245 2246 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2247 dk = rtmp[k]; 2248 i = jl[k]; /* first row to be added to k_th row */ 2249 2250 while (i < k) { 2251 nexti = jl[i]; /* next row to be added to k_th row */ 2252 2253 /* compute multiplier, update diag(k) and U(i,k) */ 2254 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2255 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 2256 dk += uikdi * ba[ili]; 2257 ba[ili] = uikdi; /* -U(i,k) */ 2258 2259 /* add multiple of row i to k-th row */ 2260 jmin = ili + 1; 2261 jmax = bi[i + 1]; 2262 if (jmin < jmax) { 2263 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2264 /* update il and jl for row i */ 2265 il[i] = jmin; 2266 j = bj[jmin]; 2267 jl[i] = jl[j]; 2268 jl[j] = i; 2269 } 2270 i = nexti; 2271 } 2272 2273 /* shift the diagonals when zero pivot is detected */ 2274 /* compute rs=sum of abs(off-diagonal) */ 2275 rs = 0.0; 2276 jmin = bi[k] + 1; 2277 nz = bi[k + 1] - jmin; 2278 bcol = bj + jmin; 2279 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2280 2281 sctx.rs = rs; 2282 sctx.pv = dk; 2283 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 2284 if (sctx.newshift) break; 2285 dk = sctx.pv; 2286 2287 /* copy data into U(k,:) */ 2288 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 2289 jmin = bi[k] + 1; 2290 jmax = bi[k + 1]; 2291 if (jmin < jmax) { 2292 for (j = jmin; j < jmax; j++) { 2293 col = bj[j]; 2294 ba[j] = rtmp[col]; 2295 } 2296 /* add the k-th row into il and jl */ 2297 il[k] = jmin; 2298 i = bj[jmin]; 2299 jl[k] = jl[i]; 2300 jl[i] = k; 2301 } 2302 } 2303 } while (sctx.newshift); 2304 2305 PetscCall(PetscFree3(rtmp, il, jl)); 2306 PetscCall(ISRestoreIndices(ip, &rip)); 2307 PetscCall(ISRestoreIndices(iip, &riip)); 2308 2309 PetscCall(ISIdentity(ip, &perm_identity)); 2310 if (perm_identity) { 2311 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2312 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2313 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2314 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2315 } else { 2316 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2317 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2318 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2319 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2320 } 2321 2322 C->assembled = PETSC_TRUE; 2323 C->preallocated = PETSC_TRUE; 2324 2325 PetscCall(PetscLogFlops(C->rmap->n)); 2326 if (sctx.nshift) { 2327 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2328 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2329 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2330 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2331 } 2332 } 2333 PetscFunctionReturn(PETSC_SUCCESS); 2334 } 2335 2336 /* 2337 icc() under revised new data structure. 2338 Factored arrays bj and ba are stored as 2339 U(0,:),...,U(i,:),U(n-1,:) 2340 2341 ui=fact->i is an array of size n+1, in which 2342 ui+ 2343 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2344 ui[n]: points to U(n-1,n-1)+1 2345 2346 udiag=fact->diag is an array of size n,in which 2347 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2348 2349 U(i,:) contains udiag[i] as its last entry, i.e., 2350 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2351 */ 2352 2353 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2354 { 2355 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2356 Mat_SeqSBAIJ *b; 2357 PetscBool perm_identity, missing; 2358 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2359 const PetscInt *rip, *riip; 2360 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2361 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2362 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2363 PetscReal fill = info->fill, levels = info->levels; 2364 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2365 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2366 PetscBT lnkbt; 2367 IS iperm; 2368 2369 PetscFunctionBegin; 2370 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2371 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2372 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2373 PetscCall(ISIdentity(perm, &perm_identity)); 2374 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2375 2376 PetscCall(PetscMalloc1(am + 1, &ui)); 2377 PetscCall(PetscMalloc1(am + 1, &udiag)); 2378 ui[0] = 0; 2379 2380 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2381 if (!levels && perm_identity) { 2382 for (i = 0; i < am; i++) { 2383 ncols = ai[i + 1] - a->diag[i]; 2384 ui[i + 1] = ui[i] + ncols; 2385 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2386 } 2387 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2388 cols = uj; 2389 for (i = 0; i < am; i++) { 2390 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2391 ncols = ai[i + 1] - a->diag[i] - 1; 2392 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2393 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2394 } 2395 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2396 PetscCall(ISGetIndices(iperm, &riip)); 2397 PetscCall(ISGetIndices(perm, &rip)); 2398 2399 /* initialization */ 2400 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2401 2402 /* jl: linked list for storing indices of the pivot rows 2403 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2404 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2405 for (i = 0; i < am; i++) { 2406 jl[i] = am; 2407 il[i] = 0; 2408 } 2409 2410 /* create and initialize a linked list for storing column indices of the active row k */ 2411 nlnk = am + 1; 2412 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2413 2414 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2415 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2416 current_space = free_space; 2417 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2418 current_space_lvl = free_space_lvl; 2419 2420 for (k = 0; k < am; k++) { /* for each active row k */ 2421 /* initialize lnk by the column indices of row rip[k] of A */ 2422 nzk = 0; 2423 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2424 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2425 ncols_upper = 0; 2426 for (j = 0; j < ncols; j++) { 2427 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2428 if (riip[i] >= k) { /* only take upper triangular entry */ 2429 ajtmp[ncols_upper] = i; 2430 ncols_upper++; 2431 } 2432 } 2433 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2434 nzk += nlnk; 2435 2436 /* update lnk by computing fill-in for each pivot row to be merged in */ 2437 prow = jl[k]; /* 1st pivot row */ 2438 2439 while (prow < k) { 2440 nextprow = jl[prow]; 2441 2442 /* merge prow into k-th row */ 2443 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2444 jmax = ui[prow + 1]; 2445 ncols = jmax - jmin; 2446 i = jmin - ui[prow]; 2447 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2448 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2449 j = *(uj - 1); 2450 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2451 nzk += nlnk; 2452 2453 /* update il and jl for prow */ 2454 if (jmin < jmax) { 2455 il[prow] = jmin; 2456 j = *cols; 2457 jl[prow] = jl[j]; 2458 jl[j] = prow; 2459 } 2460 prow = nextprow; 2461 } 2462 2463 /* if free space is not available, make more free space */ 2464 if (current_space->local_remaining < nzk) { 2465 i = am - k + 1; /* num of unfactored rows */ 2466 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2467 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2468 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2469 reallocs++; 2470 } 2471 2472 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2473 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2474 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2475 2476 /* add the k-th row into il and jl */ 2477 if (nzk > 1) { 2478 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2479 jl[k] = jl[i]; 2480 jl[i] = k; 2481 il[k] = ui[k] + 1; 2482 } 2483 uj_ptr[k] = current_space->array; 2484 uj_lvl_ptr[k] = current_space_lvl->array; 2485 2486 current_space->array += nzk; 2487 current_space->local_used += nzk; 2488 current_space->local_remaining -= nzk; 2489 2490 current_space_lvl->array += nzk; 2491 current_space_lvl->local_used += nzk; 2492 current_space_lvl->local_remaining -= nzk; 2493 2494 ui[k + 1] = ui[k] + nzk; 2495 } 2496 2497 PetscCall(ISRestoreIndices(perm, &rip)); 2498 PetscCall(ISRestoreIndices(iperm, &riip)); 2499 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2500 PetscCall(PetscFree(ajtmp)); 2501 2502 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2503 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2504 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2505 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2506 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2507 2508 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2509 2510 /* put together the new matrix in MATSEQSBAIJ format */ 2511 b = (Mat_SeqSBAIJ *)(fact)->data; 2512 b->singlemalloc = PETSC_FALSE; 2513 2514 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2515 2516 b->j = uj; 2517 b->i = ui; 2518 b->diag = udiag; 2519 b->free_diag = PETSC_TRUE; 2520 b->ilen = NULL; 2521 b->imax = NULL; 2522 b->row = perm; 2523 b->col = perm; 2524 PetscCall(PetscObjectReference((PetscObject)perm)); 2525 PetscCall(PetscObjectReference((PetscObject)perm)); 2526 b->icol = iperm; 2527 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2528 2529 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2530 2531 b->maxnz = b->nz = ui[am]; 2532 b->free_a = PETSC_TRUE; 2533 b->free_ij = PETSC_TRUE; 2534 2535 fact->info.factor_mallocs = reallocs; 2536 fact->info.fill_ratio_given = fill; 2537 if (ai[am] != 0) { 2538 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2539 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2540 } else { 2541 fact->info.fill_ratio_needed = 0.0; 2542 } 2543 #if defined(PETSC_USE_INFO) 2544 if (ai[am] != 0) { 2545 PetscReal af = fact->info.fill_ratio_needed; 2546 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2547 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2548 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2549 } else { 2550 PetscCall(PetscInfo(A, "Empty matrix\n")); 2551 } 2552 #endif 2553 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2554 PetscFunctionReturn(PETSC_SUCCESS); 2555 } 2556 2557 #if 0 2558 // unused 2559 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2560 { 2561 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2562 Mat_SeqSBAIJ *b; 2563 PetscBool perm_identity, missing; 2564 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2565 const PetscInt *rip, *riip; 2566 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2567 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2568 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2569 PetscReal fill = info->fill, levels = info->levels; 2570 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2571 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2572 PetscBT lnkbt; 2573 IS iperm; 2574 2575 PetscFunctionBegin; 2576 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2577 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2578 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2579 PetscCall(ISIdentity(perm, &perm_identity)); 2580 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2581 2582 PetscCall(PetscMalloc1(am + 1, &ui)); 2583 PetscCall(PetscMalloc1(am + 1, &udiag)); 2584 ui[0] = 0; 2585 2586 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2587 if (!levels && perm_identity) { 2588 for (i = 0; i < am; i++) { 2589 ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 2590 udiag[i] = ui[i]; 2591 } 2592 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2593 cols = uj; 2594 for (i = 0; i < am; i++) { 2595 aj = a->j + a->diag[i]; 2596 ncols = ui[i + 1] - ui[i]; 2597 for (j = 0; j < ncols; j++) *cols++ = *aj++; 2598 } 2599 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2600 PetscCall(ISGetIndices(iperm, &riip)); 2601 PetscCall(ISGetIndices(perm, &rip)); 2602 2603 /* initialization */ 2604 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2605 2606 /* jl: linked list for storing indices of the pivot rows 2607 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2608 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2609 for (i = 0; i < am; i++) { 2610 jl[i] = am; 2611 il[i] = 0; 2612 } 2613 2614 /* create and initialize a linked list for storing column indices of the active row k */ 2615 nlnk = am + 1; 2616 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2617 2618 /* initial FreeSpace size is fill*(ai[am]+1) */ 2619 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2620 current_space = free_space; 2621 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 2622 current_space_lvl = free_space_lvl; 2623 2624 for (k = 0; k < am; k++) { /* for each active row k */ 2625 /* initialize lnk by the column indices of row rip[k] of A */ 2626 nzk = 0; 2627 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2628 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2629 ncols_upper = 0; 2630 for (j = 0; j < ncols; j++) { 2631 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2632 if (riip[i] >= k) { /* only take upper triangular entry */ 2633 ajtmp[ncols_upper] = i; 2634 ncols_upper++; 2635 } 2636 } 2637 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2638 nzk += nlnk; 2639 2640 /* update lnk by computing fill-in for each pivot row to be merged in */ 2641 prow = jl[k]; /* 1st pivot row */ 2642 2643 while (prow < k) { 2644 nextprow = jl[prow]; 2645 2646 /* merge prow into k-th row */ 2647 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2648 jmax = ui[prow + 1]; 2649 ncols = jmax - jmin; 2650 i = jmin - ui[prow]; 2651 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2652 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2653 j = *(uj - 1); 2654 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2655 nzk += nlnk; 2656 2657 /* update il and jl for prow */ 2658 if (jmin < jmax) { 2659 il[prow] = jmin; 2660 j = *cols; 2661 jl[prow] = jl[j]; 2662 jl[j] = prow; 2663 } 2664 prow = nextprow; 2665 } 2666 2667 /* if free space is not available, make more free space */ 2668 if (current_space->local_remaining < nzk) { 2669 i = am - k + 1; /* num of unfactored rows */ 2670 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2671 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2672 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2673 reallocs++; 2674 } 2675 2676 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2677 PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2678 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2679 2680 /* add the k-th row into il and jl */ 2681 if (nzk > 1) { 2682 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2683 jl[k] = jl[i]; 2684 jl[i] = k; 2685 il[k] = ui[k] + 1; 2686 } 2687 uj_ptr[k] = current_space->array; 2688 uj_lvl_ptr[k] = current_space_lvl->array; 2689 2690 current_space->array += nzk; 2691 current_space->local_used += nzk; 2692 current_space->local_remaining -= nzk; 2693 2694 current_space_lvl->array += nzk; 2695 current_space_lvl->local_used += nzk; 2696 current_space_lvl->local_remaining -= nzk; 2697 2698 ui[k + 1] = ui[k] + nzk; 2699 } 2700 2701 #if defined(PETSC_USE_INFO) 2702 if (ai[am] != 0) { 2703 PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 2704 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2705 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2706 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2707 } else { 2708 PetscCall(PetscInfo(A, "Empty matrix\n")); 2709 } 2710 #endif 2711 2712 PetscCall(ISRestoreIndices(perm, &rip)); 2713 PetscCall(ISRestoreIndices(iperm, &riip)); 2714 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2715 PetscCall(PetscFree(ajtmp)); 2716 2717 /* destroy list of free space and other temporary array(s) */ 2718 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2719 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 2720 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2721 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2722 2723 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2724 2725 /* put together the new matrix in MATSEQSBAIJ format */ 2726 2727 b = (Mat_SeqSBAIJ *)fact->data; 2728 b->singlemalloc = PETSC_FALSE; 2729 2730 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2731 2732 b->j = uj; 2733 b->i = ui; 2734 b->diag = udiag; 2735 b->free_diag = PETSC_TRUE; 2736 b->ilen = NULL; 2737 b->imax = NULL; 2738 b->row = perm; 2739 b->col = perm; 2740 2741 PetscCall(PetscObjectReference((PetscObject)perm)); 2742 PetscCall(PetscObjectReference((PetscObject)perm)); 2743 2744 b->icol = iperm; 2745 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2746 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2747 b->maxnz = b->nz = ui[am]; 2748 b->free_a = PETSC_TRUE; 2749 b->free_ij = PETSC_TRUE; 2750 2751 fact->info.factor_mallocs = reallocs; 2752 fact->info.fill_ratio_given = fill; 2753 if (ai[am] != 0) { 2754 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 2755 } else { 2756 fact->info.fill_ratio_needed = 0.0; 2757 } 2758 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2759 PetscFunctionReturn(PETSC_SUCCESS); 2760 } 2761 #endif 2762 2763 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2764 { 2765 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2766 Mat_SeqSBAIJ *b; 2767 PetscBool perm_identity, missing; 2768 PetscReal fill = info->fill; 2769 const PetscInt *rip, *riip; 2770 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2771 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2772 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2773 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2774 PetscBT lnkbt; 2775 IS iperm; 2776 2777 PetscFunctionBegin; 2778 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2779 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2780 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2781 2782 /* check whether perm is the identity mapping */ 2783 PetscCall(ISIdentity(perm, &perm_identity)); 2784 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2785 PetscCall(ISGetIndices(iperm, &riip)); 2786 PetscCall(ISGetIndices(perm, &rip)); 2787 2788 /* initialization */ 2789 PetscCall(PetscMalloc1(am + 1, &ui)); 2790 PetscCall(PetscMalloc1(am + 1, &udiag)); 2791 ui[0] = 0; 2792 2793 /* jl: linked list for storing indices of the pivot rows 2794 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2795 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2796 for (i = 0; i < am; i++) { 2797 jl[i] = am; 2798 il[i] = 0; 2799 } 2800 2801 /* create and initialize a linked list for storing column indices of the active row k */ 2802 nlnk = am + 1; 2803 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2804 2805 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2806 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2807 current_space = free_space; 2808 2809 for (k = 0; k < am; k++) { /* for each active row k */ 2810 /* initialize lnk by the column indices of row rip[k] of A */ 2811 nzk = 0; 2812 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2813 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2814 ncols_upper = 0; 2815 for (j = 0; j < ncols; j++) { 2816 i = riip[*(aj + ai[rip[k]] + j)]; 2817 if (i >= k) { /* only take upper triangular entry */ 2818 cols[ncols_upper] = i; 2819 ncols_upper++; 2820 } 2821 } 2822 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2823 nzk += nlnk; 2824 2825 /* update lnk by computing fill-in for each pivot row to be merged in */ 2826 prow = jl[k]; /* 1st pivot row */ 2827 2828 while (prow < k) { 2829 nextprow = jl[prow]; 2830 /* merge prow into k-th row */ 2831 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2832 jmax = ui[prow + 1]; 2833 ncols = jmax - jmin; 2834 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2835 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2836 nzk += nlnk; 2837 2838 /* update il and jl for prow */ 2839 if (jmin < jmax) { 2840 il[prow] = jmin; 2841 j = *uj_ptr; 2842 jl[prow] = jl[j]; 2843 jl[j] = prow; 2844 } 2845 prow = nextprow; 2846 } 2847 2848 /* if free space is not available, make more free space */ 2849 if (current_space->local_remaining < nzk) { 2850 i = am - k + 1; /* num of unfactored rows */ 2851 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2852 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2853 reallocs++; 2854 } 2855 2856 /* copy data into free space, then initialize lnk */ 2857 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2858 2859 /* add the k-th row into il and jl */ 2860 if (nzk > 1) { 2861 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2862 jl[k] = jl[i]; 2863 jl[i] = k; 2864 il[k] = ui[k] + 1; 2865 } 2866 ui_ptr[k] = current_space->array; 2867 2868 current_space->array += nzk; 2869 current_space->local_used += nzk; 2870 current_space->local_remaining -= nzk; 2871 2872 ui[k + 1] = ui[k] + nzk; 2873 } 2874 2875 PetscCall(ISRestoreIndices(perm, &rip)); 2876 PetscCall(ISRestoreIndices(iperm, &riip)); 2877 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2878 2879 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2880 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2881 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2882 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2883 2884 /* put together the new matrix in MATSEQSBAIJ format */ 2885 2886 b = (Mat_SeqSBAIJ *)fact->data; 2887 b->singlemalloc = PETSC_FALSE; 2888 b->free_a = PETSC_TRUE; 2889 b->free_ij = PETSC_TRUE; 2890 2891 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2892 2893 b->j = uj; 2894 b->i = ui; 2895 b->diag = udiag; 2896 b->free_diag = PETSC_TRUE; 2897 b->ilen = NULL; 2898 b->imax = NULL; 2899 b->row = perm; 2900 b->col = perm; 2901 2902 PetscCall(PetscObjectReference((PetscObject)perm)); 2903 PetscCall(PetscObjectReference((PetscObject)perm)); 2904 2905 b->icol = iperm; 2906 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2907 2908 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2909 2910 b->maxnz = b->nz = ui[am]; 2911 2912 fact->info.factor_mallocs = reallocs; 2913 fact->info.fill_ratio_given = fill; 2914 if (ai[am] != 0) { 2915 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2916 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2917 } else { 2918 fact->info.fill_ratio_needed = 0.0; 2919 } 2920 #if defined(PETSC_USE_INFO) 2921 if (ai[am] != 0) { 2922 PetscReal af = fact->info.fill_ratio_needed; 2923 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2924 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2925 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2926 } else { 2927 PetscCall(PetscInfo(A, "Empty matrix\n")); 2928 } 2929 #endif 2930 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2931 PetscFunctionReturn(PETSC_SUCCESS); 2932 } 2933 2934 #if 0 2935 // unused 2936 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2937 { 2938 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2939 Mat_SeqSBAIJ *b; 2940 PetscBool perm_identity, missing; 2941 PetscReal fill = info->fill; 2942 const PetscInt *rip, *riip; 2943 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2944 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2945 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 2946 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2947 PetscBT lnkbt; 2948 IS iperm; 2949 2950 PetscFunctionBegin; 2951 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2952 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2953 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2954 2955 /* check whether perm is the identity mapping */ 2956 PetscCall(ISIdentity(perm, &perm_identity)); 2957 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2958 PetscCall(ISGetIndices(iperm, &riip)); 2959 PetscCall(ISGetIndices(perm, &rip)); 2960 2961 /* initialization */ 2962 PetscCall(PetscMalloc1(am + 1, &ui)); 2963 ui[0] = 0; 2964 2965 /* jl: linked list for storing indices of the pivot rows 2966 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2967 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2968 for (i = 0; i < am; i++) { 2969 jl[i] = am; 2970 il[i] = 0; 2971 } 2972 2973 /* create and initialize a linked list for storing column indices of the active row k */ 2974 nlnk = am + 1; 2975 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2976 2977 /* initial FreeSpace size is fill*(ai[am]+1) */ 2978 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2979 current_space = free_space; 2980 2981 for (k = 0; k < am; k++) { /* for each active row k */ 2982 /* initialize lnk by the column indices of row rip[k] of A */ 2983 nzk = 0; 2984 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2985 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2986 ncols_upper = 0; 2987 for (j = 0; j < ncols; j++) { 2988 i = riip[*(aj + ai[rip[k]] + j)]; 2989 if (i >= k) { /* only take upper triangular entry */ 2990 cols[ncols_upper] = i; 2991 ncols_upper++; 2992 } 2993 } 2994 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2995 nzk += nlnk; 2996 2997 /* update lnk by computing fill-in for each pivot row to be merged in */ 2998 prow = jl[k]; /* 1st pivot row */ 2999 3000 while (prow < k) { 3001 nextprow = jl[prow]; 3002 /* merge prow into k-th row */ 3003 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3004 jmax = ui[prow + 1]; 3005 ncols = jmax - jmin; 3006 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3007 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 3008 nzk += nlnk; 3009 3010 /* update il and jl for prow */ 3011 if (jmin < jmax) { 3012 il[prow] = jmin; 3013 j = *uj_ptr; 3014 jl[prow] = jl[j]; 3015 jl[j] = prow; 3016 } 3017 prow = nextprow; 3018 } 3019 3020 /* if free space is not available, make more free space */ 3021 if (current_space->local_remaining < nzk) { 3022 i = am - k + 1; /* num of unfactored rows */ 3023 i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3024 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 3025 reallocs++; 3026 } 3027 3028 /* copy data into free space, then initialize lnk */ 3029 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 3030 3031 /* add the k-th row into il and jl */ 3032 if (nzk - 1 > 0) { 3033 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3034 jl[k] = jl[i]; 3035 jl[i] = k; 3036 il[k] = ui[k] + 1; 3037 } 3038 ui_ptr[k] = current_space->array; 3039 3040 current_space->array += nzk; 3041 current_space->local_used += nzk; 3042 current_space->local_remaining -= nzk; 3043 3044 ui[k + 1] = ui[k] + nzk; 3045 } 3046 3047 #if defined(PETSC_USE_INFO) 3048 if (ai[am] != 0) { 3049 PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]); 3050 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3051 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3052 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3053 } else { 3054 PetscCall(PetscInfo(A, "Empty matrix\n")); 3055 } 3056 #endif 3057 3058 PetscCall(ISRestoreIndices(perm, &rip)); 3059 PetscCall(ISRestoreIndices(iperm, &riip)); 3060 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 3061 3062 /* destroy list of free space and other temporary array(s) */ 3063 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 3064 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 3065 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3066 3067 /* put together the new matrix in MATSEQSBAIJ format */ 3068 3069 b = (Mat_SeqSBAIJ *)fact->data; 3070 b->singlemalloc = PETSC_FALSE; 3071 b->free_a = PETSC_TRUE; 3072 b->free_ij = PETSC_TRUE; 3073 3074 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 3075 3076 b->j = uj; 3077 b->i = ui; 3078 b->diag = NULL; 3079 b->ilen = NULL; 3080 b->imax = NULL; 3081 b->row = perm; 3082 b->col = perm; 3083 3084 PetscCall(PetscObjectReference((PetscObject)perm)); 3085 PetscCall(PetscObjectReference((PetscObject)perm)); 3086 3087 b->icol = iperm; 3088 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3089 3090 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 3091 b->maxnz = b->nz = ui[am]; 3092 3093 fact->info.factor_mallocs = reallocs; 3094 fact->info.fill_ratio_given = fill; 3095 if (ai[am] != 0) { 3096 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 3097 } else { 3098 fact->info.fill_ratio_needed = 0.0; 3099 } 3100 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3101 PetscFunctionReturn(PETSC_SUCCESS); 3102 } 3103 #endif 3104 3105 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 3106 { 3107 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3108 PetscInt n = A->rmap->n; 3109 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3110 PetscScalar *x, sum; 3111 const PetscScalar *b; 3112 const MatScalar *aa = a->a, *v; 3113 PetscInt i, nz; 3114 3115 PetscFunctionBegin; 3116 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3117 3118 PetscCall(VecGetArrayRead(bb, &b)); 3119 PetscCall(VecGetArrayWrite(xx, &x)); 3120 3121 /* forward solve the lower triangular */ 3122 x[0] = b[0]; 3123 v = aa; 3124 vi = aj; 3125 for (i = 1; i < n; i++) { 3126 nz = ai[i + 1] - ai[i]; 3127 sum = b[i]; 3128 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3129 v += nz; 3130 vi += nz; 3131 x[i] = sum; 3132 } 3133 3134 /* backward solve the upper triangular */ 3135 for (i = n - 1; i >= 0; i--) { 3136 v = aa + adiag[i + 1] + 1; 3137 vi = aj + adiag[i + 1] + 1; 3138 nz = adiag[i] - adiag[i + 1] - 1; 3139 sum = x[i]; 3140 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3141 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3142 } 3143 3144 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3145 PetscCall(VecRestoreArrayRead(bb, &b)); 3146 PetscCall(VecRestoreArrayWrite(xx, &x)); 3147 PetscFunctionReturn(PETSC_SUCCESS); 3148 } 3149 3150 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 3151 { 3152 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3153 IS iscol = a->col, isrow = a->row; 3154 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3155 const PetscInt *rout, *cout, *r, *c; 3156 PetscScalar *x, *tmp, sum; 3157 const PetscScalar *b; 3158 const MatScalar *aa = a->a, *v; 3159 3160 PetscFunctionBegin; 3161 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3162 3163 PetscCall(VecGetArrayRead(bb, &b)); 3164 PetscCall(VecGetArrayWrite(xx, &x)); 3165 tmp = a->solve_work; 3166 3167 PetscCall(ISGetIndices(isrow, &rout)); 3168 r = rout; 3169 PetscCall(ISGetIndices(iscol, &cout)); 3170 c = cout; 3171 3172 /* forward solve the lower triangular */ 3173 tmp[0] = b[r[0]]; 3174 v = aa; 3175 vi = aj; 3176 for (i = 1; i < n; i++) { 3177 nz = ai[i + 1] - ai[i]; 3178 sum = b[r[i]]; 3179 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3180 tmp[i] = sum; 3181 v += nz; 3182 vi += nz; 3183 } 3184 3185 /* backward solve the upper triangular */ 3186 for (i = n - 1; i >= 0; i--) { 3187 v = aa + adiag[i + 1] + 1; 3188 vi = aj + adiag[i + 1] + 1; 3189 nz = adiag[i] - adiag[i + 1] - 1; 3190 sum = tmp[i]; 3191 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3192 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3193 } 3194 3195 PetscCall(ISRestoreIndices(isrow, &rout)); 3196 PetscCall(ISRestoreIndices(iscol, &cout)); 3197 PetscCall(VecRestoreArrayRead(bb, &b)); 3198 PetscCall(VecRestoreArrayWrite(xx, &x)); 3199 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3200 PetscFunctionReturn(PETSC_SUCCESS); 3201 } 3202 3203 #if 0 3204 // unused 3205 /* 3206 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3207 */ 3208 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) 3209 { 3210 Mat B = *fact; 3211 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3212 IS isicol; 3213 const PetscInt *r, *ic; 3214 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3215 PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3216 PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 3217 PetscInt nlnk, *lnk; 3218 PetscBT lnkbt; 3219 PetscBool row_identity, icol_identity; 3220 MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 3221 const PetscInt *ics; 3222 PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3223 PetscReal dt = info->dt, shift = info->shiftamount; 3224 PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3225 PetscBool missing; 3226 3227 PetscFunctionBegin; 3228 if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005; 3229 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3230 3231 /* symbolic factorization, can be reused */ 3232 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3233 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3234 adiag = a->diag; 3235 3236 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3237 3238 /* bdiag is location of diagonal in factor */ 3239 PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 3240 PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3241 3242 /* allocate row pointers bi */ 3243 PetscCall(PetscMalloc1(2 * n + 2, &bi)); 3244 3245 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3246 if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 3247 nnz_max = ai[n] + 2 * n * dtcount + 2; 3248 3249 PetscCall(PetscMalloc1(nnz_max + 1, &bj)); 3250 PetscCall(PetscMalloc1(nnz_max + 1, &ba)); 3251 3252 /* put together the new matrix */ 3253 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3254 b = (Mat_SeqAIJ *)B->data; 3255 3256 b->free_a = PETSC_TRUE; 3257 b->free_ij = PETSC_TRUE; 3258 b->singlemalloc = PETSC_FALSE; 3259 3260 b->a = ba; 3261 b->j = bj; 3262 b->i = bi; 3263 b->diag = bdiag; 3264 b->ilen = NULL; 3265 b->imax = NULL; 3266 b->row = isrow; 3267 b->col = iscol; 3268 PetscCall(PetscObjectReference((PetscObject)isrow)); 3269 PetscCall(PetscObjectReference((PetscObject)iscol)); 3270 b->icol = isicol; 3271 3272 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3273 b->maxnz = nnz_max; 3274 3275 B->factortype = MAT_FACTOR_ILUDT; 3276 B->info.factor_mallocs = 0; 3277 B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 3278 /* end of symbolic factorization */ 3279 3280 PetscCall(ISGetIndices(isrow, &r)); 3281 PetscCall(ISGetIndices(isicol, &ic)); 3282 ics = ic; 3283 3284 /* linked list for storing column indices of the active row */ 3285 nlnk = n + 1; 3286 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3287 3288 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3289 PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 3290 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3291 PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 3292 PetscCall(PetscArrayzero(rtmp, n)); 3293 3294 bi[0] = 0; 3295 bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3296 bdiag_rev[n] = bdiag[0]; 3297 bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3298 for (i = 0; i < n; i++) { 3299 /* copy initial fill into linked list */ 3300 nzi = ai[r[i] + 1] - ai[r[i]]; 3301 PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 3302 nzi_al = adiag[r[i]] - ai[r[i]]; 3303 nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3304 ajtmp = aj + ai[r[i]]; 3305 PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3306 3307 /* load in initial (unfactored row) */ 3308 aatmp = a->a + ai[r[i]]; 3309 for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3310 3311 /* add pivot rows into linked list */ 3312 row = lnk[n]; 3313 while (row < i) { 3314 nzi_bl = bi[row + 1] - bi[row] + 1; 3315 bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 3316 PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3317 nzi += nlnk; 3318 row = lnk[row]; 3319 } 3320 3321 /* copy data from lnk into jtmp, then initialize lnk */ 3322 PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3323 3324 /* numerical factorization */ 3325 bjtmp = jtmp; 3326 row = *bjtmp++; /* 1st pivot row */ 3327 while (row < i) { 3328 pc = rtmp + row; 3329 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3330 multiplier = (*pc) * (*pv); 3331 *pc = multiplier; 3332 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3333 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3334 pv = ba + bdiag[row + 1] + 1; 3335 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3336 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3337 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3338 } 3339 row = *bjtmp++; 3340 } 3341 3342 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3343 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3344 nzi_bl = 0; 3345 j = 0; 3346 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3347 vtmp[j] = rtmp[jtmp[j]]; 3348 rtmp[jtmp[j]] = 0.0; 3349 nzi_bl++; 3350 j++; 3351 } 3352 nzi_bu = nzi - nzi_bl - 1; 3353 while (j < nzi) { 3354 vtmp[j] = rtmp[jtmp[j]]; 3355 rtmp[jtmp[j]] = 0.0; 3356 j++; 3357 } 3358 3359 bjtmp = bj + bi[i]; 3360 batmp = ba + bi[i]; 3361 /* apply level dropping rule to L part */ 3362 ncut = nzi_al + dtcount; 3363 if (ncut < nzi_bl) { 3364 PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 3365 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3366 } else { 3367 ncut = nzi_bl; 3368 } 3369 for (j = 0; j < ncut; j++) { 3370 bjtmp[j] = jtmp[j]; 3371 batmp[j] = vtmp[j]; 3372 } 3373 bi[i + 1] = bi[i] + ncut; 3374 nzi = ncut + 1; 3375 3376 /* apply level dropping rule to U part */ 3377 ncut = nzi_au + dtcount; 3378 if (ncut < nzi_bu) { 3379 PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 3380 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 3381 } else { 3382 ncut = nzi_bu; 3383 } 3384 nzi += ncut; 3385 3386 /* mark bdiagonal */ 3387 bdiag[i + 1] = bdiag[i] - (ncut + 1); 3388 bdiag_rev[n - i - 1] = bdiag[i + 1]; 3389 bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 3390 bjtmp = bj + bdiag[i]; 3391 batmp = ba + bdiag[i]; 3392 *bjtmp = i; 3393 *batmp = diag_tmp; /* rtmp[i]; */ 3394 if (*batmp == 0.0) *batmp = dt + shift; 3395 *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 3396 3397 bjtmp = bj + bdiag[i + 1] + 1; 3398 batmp = ba + bdiag[i + 1] + 1; 3399 for (k = 0; k < ncut; k++) { 3400 bjtmp[k] = jtmp[nzi_bl + 1 + k]; 3401 batmp[k] = vtmp[nzi_bl + 1 + k]; 3402 } 3403 3404 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3405 } /* for (i=0; i<n; i++) */ 3406 PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3407 3408 PetscCall(ISRestoreIndices(isrow, &r)); 3409 PetscCall(ISRestoreIndices(isicol, &ic)); 3410 3411 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3412 PetscCall(PetscFree2(im, jtmp)); 3413 PetscCall(PetscFree2(rtmp, vtmp)); 3414 PetscCall(PetscFree(bdiag_rev)); 3415 3416 PetscCall(PetscLogFlops(B->cmap->n)); 3417 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3418 3419 PetscCall(ISIdentity(isrow, &row_identity)); 3420 PetscCall(ISIdentity(isicol, &icol_identity)); 3421 if (row_identity && icol_identity) { 3422 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3423 } else { 3424 B->ops->solve = MatSolve_SeqAIJ; 3425 } 3426 3427 B->ops->solveadd = NULL; 3428 B->ops->solvetranspose = NULL; 3429 B->ops->solvetransposeadd = NULL; 3430 B->ops->matsolve = NULL; 3431 B->assembled = PETSC_TRUE; 3432 B->preallocated = PETSC_TRUE; 3433 PetscFunctionReturn(PETSC_SUCCESS); 3434 } 3435 3436 /* a wrapper of MatILUDTFactor_SeqAIJ() */ 3437 /* 3438 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3439 */ 3440 3441 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 3442 { 3443 PetscFunctionBegin; 3444 PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 3445 PetscFunctionReturn(PETSC_SUCCESS); 3446 } 3447 3448 /* 3449 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3450 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3451 */ 3452 /* 3453 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3454 */ 3455 3456 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) 3457 { 3458 Mat C = fact; 3459 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 3460 IS isrow = b->row, isicol = b->icol; 3461 const PetscInt *r, *ic, *ics; 3462 PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3463 PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 3464 MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3465 PetscReal dt = info->dt, shift = info->shiftamount; 3466 PetscBool row_identity, col_identity; 3467 3468 PetscFunctionBegin; 3469 PetscCall(ISGetIndices(isrow, &r)); 3470 PetscCall(ISGetIndices(isicol, &ic)); 3471 PetscCall(PetscMalloc1(n + 1, &rtmp)); 3472 ics = ic; 3473 3474 for (i = 0; i < n; i++) { 3475 /* initialize rtmp array */ 3476 nzl = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */ 3477 bjtmp = bj + bi[i]; 3478 for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3479 rtmp[i] = 0.0; 3480 nzu = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */ 3481 bjtmp = bj + bdiag[i + 1] + 1; 3482 for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 3483 3484 /* load in initial unfactored row of A */ 3485 nz = ai[r[i] + 1] - ai[r[i]]; 3486 ajtmp = aj + ai[r[i]]; 3487 v = aa + ai[r[i]]; 3488 for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 3489 3490 /* numerical factorization */ 3491 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3492 nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3493 k = 0; 3494 while (k < nzl) { 3495 row = *bjtmp++; 3496 pc = rtmp + row; 3497 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3498 multiplier = (*pc) * (*pv); 3499 *pc = multiplier; 3500 if (PetscAbsScalar(multiplier) > dt) { 3501 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3502 pv = b->a + bdiag[row + 1] + 1; 3503 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3504 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3505 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3506 } 3507 k++; 3508 } 3509 3510 /* finished row so stick it into b->a */ 3511 /* L-part */ 3512 pv = b->a + bi[i]; 3513 pj = bj + bi[i]; 3514 nzl = bi[i + 1] - bi[i]; 3515 for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3516 3517 /* diagonal: invert diagonal entries for simpler triangular solves */ 3518 if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3519 b->a[bdiag[i]] = 1.0 / rtmp[i]; 3520 3521 /* U-part */ 3522 pv = b->a + bdiag[i + 1] + 1; 3523 pj = bj + bdiag[i + 1] + 1; 3524 nzu = bdiag[i] - bdiag[i + 1] - 1; 3525 for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3526 } 3527 3528 PetscCall(PetscFree(rtmp)); 3529 PetscCall(ISRestoreIndices(isicol, &ic)); 3530 PetscCall(ISRestoreIndices(isrow, &r)); 3531 3532 PetscCall(ISIdentity(isrow, &row_identity)); 3533 PetscCall(ISIdentity(isicol, &col_identity)); 3534 if (row_identity && col_identity) { 3535 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3536 } else { 3537 C->ops->solve = MatSolve_SeqAIJ; 3538 } 3539 C->ops->solveadd = NULL; 3540 C->ops->solvetranspose = NULL; 3541 C->ops->solvetransposeadd = NULL; 3542 C->ops->matsolve = NULL; 3543 C->assembled = PETSC_TRUE; 3544 C->preallocated = PETSC_TRUE; 3545 3546 PetscCall(PetscLogFlops(C->cmap->n)); 3547 PetscFunctionReturn(PETSC_SUCCESS); 3548 } 3549 #endif 3550