1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petscbt.h> 4 #include <../src/mat/utils/freespace.h> 5 6 /* 7 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 8 9 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 10 */ 11 #if 0 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 15 PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 16 const PetscInt *ai = a->i, *aj = a->j; 17 const PetscScalar *aa = a->a; 18 PetscBool *done; 19 PetscReal best, past = 0, future; 20 21 PetscFunctionBegin; 22 /* pick initial row */ 23 best = -1; 24 for (i = 0; i < n; i++) { 25 future = 0.0; 26 for (j = ai[i]; j < ai[i + 1]; j++) { 27 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28 else past = PetscAbsScalar(aa[j]); 29 } 30 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 31 if (past / future > best) { 32 best = past / future; 33 current = i; 34 } 35 } 36 37 PetscCall(PetscMalloc1(n, &done)); 38 PetscCall(PetscArrayzero(done, n)); 39 PetscCall(PetscMalloc1(n, &order)); 40 order[0] = current; 41 for (i = 0; i < n - 1; i++) { 42 done[current] = PETSC_TRUE; 43 best = -1; 44 /* loop over all neighbors of current pivot */ 45 for (j = ai[current]; j < ai[current + 1]; j++) { 46 jj = aj[j]; 47 if (done[jj]) continue; 48 /* loop over columns of potential next row computing weights for below and above diagonal */ 49 past = future = 0.0; 50 for (k = ai[jj]; k < ai[jj + 1]; k++) { 51 kk = aj[k]; 52 if (done[kk]) past += PetscAbsScalar(aa[k]); 53 else if (kk != jj) future += PetscAbsScalar(aa[k]); 54 } 55 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 56 if (past / future > best) { 57 best = past / future; 58 newcurrent = jj; 59 } 60 } 61 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 62 best = -1; 63 for (k = 0; k < n; k++) { 64 if (done[k]) continue; 65 future = 0.0; 66 past = 0.0; 67 for (j = ai[k]; j < ai[k + 1]; j++) { 68 kk = aj[j]; 69 if (done[kk]) past += PetscAbsScalar(aa[j]); 70 else if (kk != k) future += PetscAbsScalar(aa[j]); 71 } 72 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 73 if (past / future > best) { 74 best = past / future; 75 newcurrent = k; 76 } 77 } 78 } 79 PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 80 current = newcurrent; 81 order[i + 1] = current; 82 } 83 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 84 *icol = *irow; 85 PetscCall(PetscObjectReference((PetscObject)*irow)); 86 PetscCall(PetscFree(done)); 87 PetscCall(PetscFree(order)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 #endif 91 92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 93 { 94 PetscFunctionBegin; 95 *type = MATSOLVERPETSC; 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 100 { 101 PetscInt n = A->rmap->n; 102 103 PetscFunctionBegin; 104 #if defined(PETSC_USE_COMPLEX) 105 if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 106 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 107 *B = NULL; 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 #endif 111 112 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 113 PetscCall(MatSetSizes(*B, n, n, n, n)); 114 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 115 PetscCall(MatSetType(*B, MATSEQAIJ)); 116 117 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 118 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 119 120 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 121 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 122 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 123 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 124 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 125 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 126 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 127 128 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 129 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 130 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 131 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 132 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 133 (*B)->factortype = ftype; 134 135 PetscCall(PetscFree((*B)->solvertype)); 136 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 137 (*B)->canuseordering = PETSC_TRUE; 138 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 #if 0 143 // currently unused 144 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 145 { 146 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 147 IS isicol; 148 const PetscInt *r, *ic; 149 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 150 PetscInt *bi, *bj, *ajtmp; 151 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 152 PetscReal f; 153 PetscInt nlnk, *lnk, k, **bi_ptr; 154 PetscFreeSpaceList free_space = NULL, current_space = NULL; 155 PetscBT lnkbt; 156 PetscBool missing; 157 158 PetscFunctionBegin; 159 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 160 PetscCall(MatMissingDiagonal(A, &missing, &i)); 161 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 162 163 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 164 PetscCall(ISGetIndices(isrow, &r)); 165 PetscCall(ISGetIndices(isicol, &ic)); 166 167 /* get new row pointers */ 168 PetscCall(PetscMalloc1(n + 1, &bi)); 169 bi[0] = 0; 170 171 /* bdiag is location of diagonal in factor */ 172 PetscCall(PetscMalloc1(n + 1, &bdiag)); 173 bdiag[0] = 0; 174 175 /* linked list for storing column indices of the active row */ 176 nlnk = n + 1; 177 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 178 179 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 180 181 /* initial FreeSpace size is f*(ai[n]+1) */ 182 f = info->fill; 183 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 184 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 185 current_space = free_space; 186 187 for (i = 0; i < n; i++) { 188 /* copy previous fill into linked list */ 189 nzi = 0; 190 nnz = ai[r[i] + 1] - ai[r[i]]; 191 ajtmp = aj + ai[r[i]]; 192 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 193 nzi += nlnk; 194 195 /* add pivot rows into linked list */ 196 row = lnk[n]; 197 while (row < i) { 198 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 199 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 200 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 201 nzi += nlnk; 202 row = lnk[row]; 203 } 204 bi[i + 1] = bi[i] + nzi; 205 im[i] = nzi; 206 207 /* mark bdiag */ 208 nzbd = 0; 209 nnz = nzi; 210 k = lnk[n]; 211 while (nnz-- && k < i) { 212 nzbd++; 213 k = lnk[k]; 214 } 215 bdiag[i] = bi[i] + nzbd; 216 217 /* if free space is not available, make more free space */ 218 if (current_space->local_remaining < nzi) { 219 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 220 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 221 reallocs++; 222 } 223 224 /* copy data into free space, then initialize lnk */ 225 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 226 227 bi_ptr[i] = current_space->array; 228 current_space->array += nzi; 229 current_space->local_used += nzi; 230 current_space->local_remaining -= nzi; 231 } 232 #if defined(PETSC_USE_INFO) 233 if (ai[n] != 0) { 234 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 235 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 236 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 237 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 238 PetscCall(PetscInfo(A, "for best performance.\n")); 239 } else { 240 PetscCall(PetscInfo(A, "Empty matrix\n")); 241 } 242 #endif 243 244 PetscCall(ISRestoreIndices(isrow, &r)); 245 PetscCall(ISRestoreIndices(isicol, &ic)); 246 247 /* destroy list of free space and other temporary array(s) */ 248 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 249 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 250 PetscCall(PetscLLDestroy(lnk, lnkbt)); 251 PetscCall(PetscFree2(bi_ptr, im)); 252 253 /* put together the new matrix */ 254 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 255 b = (Mat_SeqAIJ *)(B)->data; 256 257 b->free_a = PETSC_TRUE; 258 b->free_ij = PETSC_TRUE; 259 b->singlemalloc = PETSC_FALSE; 260 261 PetscCall(PetscMalloc1(bi[n] + 1, &b->a)); 262 b->j = bj; 263 b->i = bi; 264 b->diag = bdiag; 265 b->ilen = NULL; 266 b->imax = NULL; 267 b->row = isrow; 268 b->col = iscol; 269 PetscCall(PetscObjectReference((PetscObject)isrow)); 270 PetscCall(PetscObjectReference((PetscObject)iscol)); 271 b->icol = isicol; 272 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 273 274 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 275 b->maxnz = b->nz = bi[n]; 276 277 (B)->factortype = MAT_FACTOR_LU; 278 (B)->info.factor_mallocs = reallocs; 279 (B)->info.fill_ratio_given = f; 280 281 if (ai[n]) { 282 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 283 } else { 284 (B)->info.fill_ratio_needed = 0.0; 285 } 286 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 287 if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 288 PetscFunctionReturn(PETSC_SUCCESS); 289 } 290 #endif 291 292 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 293 { 294 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 295 IS isicol; 296 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 297 PetscInt i, n = A->rmap->n; 298 PetscInt *bi, *bj; 299 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 300 PetscReal f; 301 PetscInt nlnk, *lnk, k, **bi_ptr; 302 PetscFreeSpaceList free_space = NULL, current_space = NULL; 303 PetscBT lnkbt; 304 PetscBool missing; 305 306 PetscFunctionBegin; 307 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 308 PetscCall(MatMissingDiagonal(A, &missing, &i)); 309 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 310 311 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 312 PetscCall(ISGetIndices(isrow, &r)); 313 PetscCall(ISGetIndices(isicol, &ic)); 314 315 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 316 PetscCall(PetscMalloc1(n + 1, &bi)); 317 PetscCall(PetscMalloc1(n + 1, &bdiag)); 318 bi[0] = bdiag[0] = 0; 319 320 /* linked list for storing column indices of the active row */ 321 nlnk = n + 1; 322 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 323 324 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 325 326 /* initial FreeSpace size is f*(ai[n]+1) */ 327 f = info->fill; 328 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 329 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 330 current_space = free_space; 331 332 for (i = 0; i < n; i++) { 333 /* copy previous fill into linked list */ 334 nzi = 0; 335 nnz = ai[r[i] + 1] - ai[r[i]]; 336 ajtmp = aj + ai[r[i]]; 337 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 338 nzi += nlnk; 339 340 /* add pivot rows into linked list */ 341 row = lnk[n]; 342 while (row < i) { 343 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 344 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 345 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 346 nzi += nlnk; 347 row = lnk[row]; 348 } 349 bi[i + 1] = bi[i] + nzi; 350 im[i] = nzi; 351 352 /* mark bdiag */ 353 nzbd = 0; 354 nnz = nzi; 355 k = lnk[n]; 356 while (nnz-- && k < i) { 357 nzbd++; 358 k = lnk[k]; 359 } 360 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 361 362 /* if free space is not available, make more free space */ 363 if (current_space->local_remaining < nzi) { 364 /* estimated additional space needed */ 365 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 366 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 367 reallocs++; 368 } 369 370 /* copy data into free space, then initialize lnk */ 371 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 372 373 bi_ptr[i] = current_space->array; 374 current_space->array += nzi; 375 current_space->local_used += nzi; 376 current_space->local_remaining -= nzi; 377 } 378 379 PetscCall(ISRestoreIndices(isrow, &r)); 380 PetscCall(ISRestoreIndices(isicol, &ic)); 381 382 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 383 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 384 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 385 PetscCall(PetscLLDestroy(lnk, lnkbt)); 386 PetscCall(PetscFree2(bi_ptr, im)); 387 388 /* put together the new matrix */ 389 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 390 b = (Mat_SeqAIJ *)(B)->data; 391 392 b->free_a = PETSC_TRUE; 393 b->free_ij = PETSC_TRUE; 394 b->singlemalloc = PETSC_FALSE; 395 396 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 397 398 b->j = bj; 399 b->i = bi; 400 b->diag = bdiag; 401 b->ilen = NULL; 402 b->imax = NULL; 403 b->row = isrow; 404 b->col = iscol; 405 PetscCall(PetscObjectReference((PetscObject)isrow)); 406 PetscCall(PetscObjectReference((PetscObject)iscol)); 407 b->icol = isicol; 408 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 409 410 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 411 b->maxnz = b->nz = bdiag[0] + 1; 412 413 B->factortype = MAT_FACTOR_LU; 414 B->info.factor_mallocs = reallocs; 415 B->info.fill_ratio_given = f; 416 417 if (ai[n]) { 418 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 419 } else { 420 B->info.fill_ratio_needed = 0.0; 421 } 422 #if defined(PETSC_USE_INFO) 423 if (ai[n] != 0) { 424 PetscReal af = B->info.fill_ratio_needed; 425 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 426 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 427 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 428 PetscCall(PetscInfo(A, "for best performance.\n")); 429 } else { 430 PetscCall(PetscInfo(A, "Empty matrix\n")); 431 } 432 #endif 433 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 434 if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 435 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 436 PetscFunctionReturn(PETSC_SUCCESS); 437 } 438 439 /* 440 Trouble in factorization, should we dump the original matrix? 441 */ 442 PetscErrorCode MatFactorDumpMatrix(Mat A) 443 { 444 PetscBool flg = PETSC_FALSE; 445 446 PetscFunctionBegin; 447 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 448 if (flg) { 449 PetscViewer viewer; 450 char filename[PETSC_MAX_PATH_LEN]; 451 452 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 453 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 454 PetscCall(MatView(A, viewer)); 455 PetscCall(PetscViewerDestroy(&viewer)); 456 } 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 461 { 462 Mat C = B; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 464 IS isrow = b->row, isicol = b->icol; 465 const PetscInt *r, *ic, *ics; 466 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 467 PetscInt i, j, k, nz, nzL, row, *pj; 468 const PetscInt *ajtmp, *bjtmp; 469 MatScalar *rtmp, *pc, multiplier, *pv; 470 const MatScalar *aa = a->a, *v; 471 PetscBool row_identity, col_identity; 472 FactorShiftCtx sctx; 473 const PetscInt *ddiag; 474 PetscReal rs; 475 MatScalar d; 476 477 PetscFunctionBegin; 478 /* MatPivotSetUp(): initialize shift context sctx */ 479 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 480 481 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 482 ddiag = a->diag; 483 sctx.shift_top = info->zeropivot; 484 for (i = 0; i < n; i++) { 485 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 486 d = (aa)[ddiag[i]]; 487 rs = -PetscAbsScalar(d) - PetscRealPart(d); 488 v = aa + ai[i]; 489 nz = ai[i + 1] - ai[i]; 490 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 491 if (rs > sctx.shift_top) sctx.shift_top = rs; 492 } 493 sctx.shift_top *= 1.1; 494 sctx.nshift_max = 5; 495 sctx.shift_lo = 0.; 496 sctx.shift_hi = 1.; 497 } 498 499 PetscCall(ISGetIndices(isrow, &r)); 500 PetscCall(ISGetIndices(isicol, &ic)); 501 PetscCall(PetscMalloc1(n + 1, &rtmp)); 502 ics = ic; 503 504 do { 505 sctx.newshift = PETSC_FALSE; 506 for (i = 0; i < n; i++) { 507 /* zero rtmp */ 508 /* L part */ 509 nz = bi[i + 1] - bi[i]; 510 bjtmp = bj + bi[i]; 511 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 512 513 /* U part */ 514 nz = bdiag[i] - bdiag[i + 1]; 515 bjtmp = bj + bdiag[i + 1] + 1; 516 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 517 518 /* load in initial (unfactored row) */ 519 nz = ai[r[i] + 1] - ai[r[i]]; 520 ajtmp = aj + ai[r[i]]; 521 v = aa + ai[r[i]]; 522 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 523 /* ZeropivotApply() */ 524 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 525 526 /* elimination */ 527 bjtmp = bj + bi[i]; 528 row = *bjtmp++; 529 nzL = bi[i + 1] - bi[i]; 530 for (k = 0; k < nzL; k++) { 531 pc = rtmp + row; 532 if (*pc != 0.0) { 533 pv = b->a + bdiag[row]; 534 multiplier = *pc * (*pv); 535 *pc = multiplier; 536 537 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 538 pv = b->a + bdiag[row + 1] + 1; 539 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 540 541 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 542 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 543 } 544 row = *bjtmp++; 545 } 546 547 /* finished row so stick it into b->a */ 548 rs = 0.0; 549 /* L part */ 550 pv = b->a + bi[i]; 551 pj = b->j + bi[i]; 552 nz = bi[i + 1] - bi[i]; 553 for (j = 0; j < nz; j++) { 554 pv[j] = rtmp[pj[j]]; 555 rs += PetscAbsScalar(pv[j]); 556 } 557 558 /* U part */ 559 pv = b->a + bdiag[i + 1] + 1; 560 pj = b->j + bdiag[i + 1] + 1; 561 nz = bdiag[i] - bdiag[i + 1] - 1; 562 for (j = 0; j < nz; j++) { 563 pv[j] = rtmp[pj[j]]; 564 rs += PetscAbsScalar(pv[j]); 565 } 566 567 sctx.rs = rs; 568 sctx.pv = rtmp[i]; 569 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 570 if (sctx.newshift) break; /* break for-loop */ 571 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 572 573 /* Mark diagonal and invert diagonal for simpler triangular solves */ 574 pv = b->a + bdiag[i]; 575 *pv = 1.0 / rtmp[i]; 576 577 } /* endof for (i=0; i<n; i++) { */ 578 579 /* MatPivotRefine() */ 580 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 581 /* 582 * if no shift in this attempt & shifting & started shifting & can refine, 583 * then try lower shift 584 */ 585 sctx.shift_hi = sctx.shift_fraction; 586 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 587 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 588 sctx.newshift = PETSC_TRUE; 589 sctx.nshift++; 590 } 591 } while (sctx.newshift); 592 593 PetscCall(PetscFree(rtmp)); 594 PetscCall(ISRestoreIndices(isicol, &ic)); 595 PetscCall(ISRestoreIndices(isrow, &r)); 596 597 PetscCall(ISIdentity(isrow, &row_identity)); 598 PetscCall(ISIdentity(isicol, &col_identity)); 599 if (b->inode.size) { 600 C->ops->solve = MatSolve_SeqAIJ_Inode; 601 } else if (row_identity && col_identity) { 602 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 603 } else { 604 C->ops->solve = MatSolve_SeqAIJ; 605 } 606 C->ops->solveadd = MatSolveAdd_SeqAIJ; 607 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 608 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 609 C->ops->matsolve = MatMatSolve_SeqAIJ; 610 C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ; 611 C->assembled = PETSC_TRUE; 612 C->preallocated = PETSC_TRUE; 613 614 PetscCall(PetscLogFlops(C->cmap->n)); 615 616 /* MatShiftView(A,info,&sctx) */ 617 if (sctx.nshift) { 618 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 619 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 620 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 621 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 622 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 623 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 624 } 625 } 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 629 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 630 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 631 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 632 633 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 634 { 635 Mat C = B; 636 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 637 IS isrow = b->row, isicol = b->icol; 638 const PetscInt *r, *ic, *ics; 639 PetscInt nz, row, i, j, n = A->rmap->n, diag; 640 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 641 const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 642 MatScalar *pv, *rtmp, *pc, multiplier, d; 643 const MatScalar *v, *aa = a->a; 644 PetscReal rs = 0.0; 645 FactorShiftCtx sctx; 646 const PetscInt *ddiag; 647 PetscBool row_identity, col_identity; 648 649 PetscFunctionBegin; 650 /* MatPivotSetUp(): initialize shift context sctx */ 651 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 652 653 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 654 ddiag = a->diag; 655 sctx.shift_top = info->zeropivot; 656 for (i = 0; i < n; i++) { 657 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 658 d = (aa)[ddiag[i]]; 659 rs = -PetscAbsScalar(d) - PetscRealPart(d); 660 v = aa + ai[i]; 661 nz = ai[i + 1] - ai[i]; 662 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 663 if (rs > sctx.shift_top) sctx.shift_top = rs; 664 } 665 sctx.shift_top *= 1.1; 666 sctx.nshift_max = 5; 667 sctx.shift_lo = 0.; 668 sctx.shift_hi = 1.; 669 } 670 671 PetscCall(ISGetIndices(isrow, &r)); 672 PetscCall(ISGetIndices(isicol, &ic)); 673 PetscCall(PetscMalloc1(n + 1, &rtmp)); 674 ics = ic; 675 676 do { 677 sctx.newshift = PETSC_FALSE; 678 for (i = 0; i < n; i++) { 679 nz = bi[i + 1] - bi[i]; 680 bjtmp = bj + bi[i]; 681 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 682 683 /* load in initial (unfactored row) */ 684 nz = ai[r[i] + 1] - ai[r[i]]; 685 ajtmp = aj + ai[r[i]]; 686 v = aa + ai[r[i]]; 687 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 688 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 689 690 row = *bjtmp++; 691 while (row < i) { 692 pc = rtmp + row; 693 if (*pc != 0.0) { 694 pv = b->a + diag_offset[row]; 695 pj = b->j + diag_offset[row] + 1; 696 multiplier = *pc / *pv++; 697 *pc = multiplier; 698 nz = bi[row + 1] - diag_offset[row] - 1; 699 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 700 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 701 } 702 row = *bjtmp++; 703 } 704 /* finished row so stick it into b->a */ 705 pv = b->a + bi[i]; 706 pj = b->j + bi[i]; 707 nz = bi[i + 1] - bi[i]; 708 diag = diag_offset[i] - bi[i]; 709 rs = 0.0; 710 for (j = 0; j < nz; j++) { 711 pv[j] = rtmp[pj[j]]; 712 rs += PetscAbsScalar(pv[j]); 713 } 714 rs -= PetscAbsScalar(pv[diag]); 715 716 sctx.rs = rs; 717 sctx.pv = pv[diag]; 718 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 719 if (sctx.newshift) break; 720 pv[diag] = sctx.pv; 721 } 722 723 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 724 /* 725 * if no shift in this attempt & shifting & started shifting & can refine, 726 * then try lower shift 727 */ 728 sctx.shift_hi = sctx.shift_fraction; 729 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 730 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 731 sctx.newshift = PETSC_TRUE; 732 sctx.nshift++; 733 } 734 } while (sctx.newshift); 735 736 /* invert diagonal entries for simpler triangular solves */ 737 for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; 738 PetscCall(PetscFree(rtmp)); 739 PetscCall(ISRestoreIndices(isicol, &ic)); 740 PetscCall(ISRestoreIndices(isrow, &r)); 741 742 PetscCall(ISIdentity(isrow, &row_identity)); 743 PetscCall(ISIdentity(isicol, &col_identity)); 744 if (row_identity && col_identity) { 745 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 746 } else { 747 C->ops->solve = MatSolve_SeqAIJ_inplace; 748 } 749 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 750 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 751 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 752 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 753 C->ops->matsolvetranspose = NULL; 754 755 C->assembled = PETSC_TRUE; 756 C->preallocated = PETSC_TRUE; 757 758 PetscCall(PetscLogFlops(C->cmap->n)); 759 if (sctx.nshift) { 760 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 761 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 762 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 763 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 764 } 765 } 766 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 767 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 768 769 PetscCall(MatSeqAIJCheckInode(C)); 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 774 775 /* 776 This routine implements inplace ILU(0) with row or/and column permutations. 777 Input: 778 A - original matrix 779 Output; 780 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 781 a->j (col index) is permuted by the inverse of colperm, then sorted 782 a->a reordered accordingly with a->j 783 a->diag (ptr to diagonal elements) is updated. 784 */ 785 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 786 { 787 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 788 IS isrow = a->row, isicol = a->icol; 789 const PetscInt *r, *ic, *ics; 790 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 791 PetscInt *ajtmp, nz, row; 792 PetscInt *diag = a->diag, nbdiag, *pj; 793 PetscScalar *rtmp, *pc, multiplier, d; 794 MatScalar *pv, *v; 795 PetscReal rs; 796 FactorShiftCtx sctx; 797 const MatScalar *aa = a->a, *vtmp; 798 799 PetscFunctionBegin; 800 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 801 802 /* MatPivotSetUp(): initialize shift context sctx */ 803 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 804 805 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 806 const PetscInt *ddiag = a->diag; 807 sctx.shift_top = info->zeropivot; 808 for (i = 0; i < n; i++) { 809 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 810 d = (aa)[ddiag[i]]; 811 rs = -PetscAbsScalar(d) - PetscRealPart(d); 812 vtmp = aa + ai[i]; 813 nz = ai[i + 1] - ai[i]; 814 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 815 if (rs > sctx.shift_top) sctx.shift_top = rs; 816 } 817 sctx.shift_top *= 1.1; 818 sctx.nshift_max = 5; 819 sctx.shift_lo = 0.; 820 sctx.shift_hi = 1.; 821 } 822 823 PetscCall(ISGetIndices(isrow, &r)); 824 PetscCall(ISGetIndices(isicol, &ic)); 825 PetscCall(PetscMalloc1(n + 1, &rtmp)); 826 PetscCall(PetscArrayzero(rtmp, n + 1)); 827 ics = ic; 828 829 #if defined(MV) 830 sctx.shift_top = 0.; 831 sctx.nshift_max = 0; 832 sctx.shift_lo = 0.; 833 sctx.shift_hi = 0.; 834 sctx.shift_fraction = 0.; 835 836 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 837 sctx.shift_top = 0.; 838 for (i = 0; i < n; i++) { 839 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 840 d = (a->a)[diag[i]]; 841 rs = -PetscAbsScalar(d) - PetscRealPart(d); 842 v = a->a + ai[i]; 843 nz = ai[i + 1] - ai[i]; 844 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 845 if (rs > sctx.shift_top) sctx.shift_top = rs; 846 } 847 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 848 sctx.shift_top *= 1.1; 849 sctx.nshift_max = 5; 850 sctx.shift_lo = 0.; 851 sctx.shift_hi = 1.; 852 } 853 854 sctx.shift_amount = 0.; 855 sctx.nshift = 0; 856 #endif 857 858 do { 859 sctx.newshift = PETSC_FALSE; 860 for (i = 0; i < n; i++) { 861 /* load in initial unfactored row */ 862 nz = ai[r[i] + 1] - ai[r[i]]; 863 ajtmp = aj + ai[r[i]]; 864 v = a->a + ai[r[i]]; 865 /* sort permuted ajtmp and values v accordingly */ 866 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 867 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 868 869 diag[r[i]] = ai[r[i]]; 870 for (j = 0; j < nz; j++) { 871 rtmp[ajtmp[j]] = v[j]; 872 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 873 } 874 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 875 876 row = *ajtmp++; 877 while (row < i) { 878 pc = rtmp + row; 879 if (*pc != 0.0) { 880 pv = a->a + diag[r[row]]; 881 pj = aj + diag[r[row]] + 1; 882 883 multiplier = *pc / *pv++; 884 *pc = multiplier; 885 nz = ai[r[row] + 1] - diag[r[row]] - 1; 886 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 887 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 888 } 889 row = *ajtmp++; 890 } 891 /* finished row so overwrite it onto a->a */ 892 pv = a->a + ai[r[i]]; 893 pj = aj + ai[r[i]]; 894 nz = ai[r[i] + 1] - ai[r[i]]; 895 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 896 897 rs = 0.0; 898 for (j = 0; j < nz; j++) { 899 pv[j] = rtmp[pj[j]]; 900 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 901 } 902 903 sctx.rs = rs; 904 sctx.pv = pv[nbdiag]; 905 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 906 if (sctx.newshift) break; 907 pv[nbdiag] = sctx.pv; 908 } 909 910 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 911 /* 912 * if no shift in this attempt & shifting & started shifting & can refine, 913 * then try lower shift 914 */ 915 sctx.shift_hi = sctx.shift_fraction; 916 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 917 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 918 sctx.newshift = PETSC_TRUE; 919 sctx.nshift++; 920 } 921 } while (sctx.newshift); 922 923 /* invert diagonal entries for simpler triangular solves */ 924 for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; 925 926 PetscCall(PetscFree(rtmp)); 927 PetscCall(ISRestoreIndices(isicol, &ic)); 928 PetscCall(ISRestoreIndices(isrow, &r)); 929 930 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 931 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 932 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 933 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 934 935 A->assembled = PETSC_TRUE; 936 A->preallocated = PETSC_TRUE; 937 938 PetscCall(PetscLogFlops(A->cmap->n)); 939 if (sctx.nshift) { 940 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 941 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 942 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 943 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 944 } 945 } 946 PetscFunctionReturn(PETSC_SUCCESS); 947 } 948 949 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 950 { 951 Mat C; 952 953 PetscFunctionBegin; 954 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 955 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 956 PetscCall(MatLUFactorNumeric(C, A, info)); 957 958 A->ops->solve = C->ops->solve; 959 A->ops->solvetranspose = C->ops->solvetranspose; 960 961 PetscCall(MatHeaderMerge(A, &C)); 962 PetscFunctionReturn(PETSC_SUCCESS); 963 } 964 965 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 966 { 967 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 968 IS iscol = a->col, isrow = a->row; 969 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 970 PetscInt nz; 971 const PetscInt *rout, *cout, *r, *c; 972 PetscScalar *x, *tmp, *tmps, sum; 973 const PetscScalar *b; 974 const MatScalar *aa = a->a, *v; 975 976 PetscFunctionBegin; 977 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 978 979 PetscCall(VecGetArrayRead(bb, &b)); 980 PetscCall(VecGetArrayWrite(xx, &x)); 981 tmp = a->solve_work; 982 983 PetscCall(ISGetIndices(isrow, &rout)); 984 r = rout; 985 PetscCall(ISGetIndices(iscol, &cout)); 986 c = cout + (n - 1); 987 988 /* forward solve the lower triangular */ 989 tmp[0] = b[*r++]; 990 tmps = tmp; 991 for (i = 1; i < n; i++) { 992 v = aa + ai[i]; 993 vi = aj + ai[i]; 994 nz = a->diag[i] - ai[i]; 995 sum = b[*r++]; 996 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 997 tmp[i] = sum; 998 } 999 1000 /* backward solve the upper triangular */ 1001 for (i = n - 1; i >= 0; i--) { 1002 v = aa + a->diag[i] + 1; 1003 vi = aj + a->diag[i] + 1; 1004 nz = ai[i + 1] - a->diag[i] - 1; 1005 sum = tmp[i]; 1006 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1007 x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 1008 } 1009 1010 PetscCall(ISRestoreIndices(isrow, &rout)); 1011 PetscCall(ISRestoreIndices(iscol, &cout)); 1012 PetscCall(VecRestoreArrayRead(bb, &b)); 1013 PetscCall(VecRestoreArrayWrite(xx, &x)); 1014 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1015 PetscFunctionReturn(PETSC_SUCCESS); 1016 } 1017 1018 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 1019 { 1020 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1021 IS iscol = a->col, isrow = a->row; 1022 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1023 PetscInt nz, neq, ldb, ldx; 1024 const PetscInt *rout, *cout, *r, *c; 1025 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1026 const PetscScalar *b, *aa = a->a, *v; 1027 PetscBool isdense; 1028 1029 PetscFunctionBegin; 1030 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1031 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1032 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1033 if (X != B) { 1034 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1035 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1036 } 1037 PetscCall(MatDenseGetArrayRead(B, &b)); 1038 PetscCall(MatDenseGetLDA(B, &ldb)); 1039 PetscCall(MatDenseGetArray(X, &x)); 1040 PetscCall(MatDenseGetLDA(X, &ldx)); 1041 PetscCall(ISGetIndices(isrow, &rout)); 1042 r = rout; 1043 PetscCall(ISGetIndices(iscol, &cout)); 1044 c = cout; 1045 for (neq = 0; neq < B->cmap->n; neq++) { 1046 /* forward solve the lower triangular */ 1047 tmp[0] = b[r[0]]; 1048 tmps = tmp; 1049 for (i = 1; i < n; i++) { 1050 v = aa + ai[i]; 1051 vi = aj + ai[i]; 1052 nz = a->diag[i] - ai[i]; 1053 sum = b[r[i]]; 1054 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1055 tmp[i] = sum; 1056 } 1057 /* backward solve the upper triangular */ 1058 for (i = n - 1; i >= 0; i--) { 1059 v = aa + a->diag[i] + 1; 1060 vi = aj + a->diag[i] + 1; 1061 nz = ai[i + 1] - a->diag[i] - 1; 1062 sum = tmp[i]; 1063 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1064 x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 1065 } 1066 b += ldb; 1067 x += ldx; 1068 } 1069 PetscCall(ISRestoreIndices(isrow, &rout)); 1070 PetscCall(ISRestoreIndices(iscol, &cout)); 1071 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1072 PetscCall(MatDenseRestoreArray(X, &x)); 1073 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 1078 { 1079 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1080 IS iscol = a->col, isrow = a->row; 1081 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1082 PetscInt nz, neq, ldb, ldx; 1083 const PetscInt *rout, *cout, *r, *c; 1084 PetscScalar *x, *tmp = a->solve_work, sum; 1085 const PetscScalar *b, *aa = a->a, *v; 1086 PetscBool isdense; 1087 1088 PetscFunctionBegin; 1089 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1090 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1091 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1092 if (X != B) { 1093 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1094 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1095 } 1096 PetscCall(MatDenseGetArrayRead(B, &b)); 1097 PetscCall(MatDenseGetLDA(B, &ldb)); 1098 PetscCall(MatDenseGetArray(X, &x)); 1099 PetscCall(MatDenseGetLDA(X, &ldx)); 1100 PetscCall(ISGetIndices(isrow, &rout)); 1101 r = rout; 1102 PetscCall(ISGetIndices(iscol, &cout)); 1103 c = cout; 1104 for (neq = 0; neq < B->cmap->n; neq++) { 1105 /* forward solve the lower triangular */ 1106 tmp[0] = b[r[0]]; 1107 v = aa; 1108 vi = aj; 1109 for (i = 1; i < n; i++) { 1110 nz = ai[i + 1] - ai[i]; 1111 sum = b[r[i]]; 1112 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1113 tmp[i] = sum; 1114 v += nz; 1115 vi += nz; 1116 } 1117 /* backward solve the upper triangular */ 1118 for (i = n - 1; i >= 0; i--) { 1119 v = aa + adiag[i + 1] + 1; 1120 vi = aj + adiag[i + 1] + 1; 1121 nz = adiag[i] - adiag[i + 1] - 1; 1122 sum = tmp[i]; 1123 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1124 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 1125 } 1126 b += ldb; 1127 x += ldx; 1128 } 1129 PetscCall(ISRestoreIndices(isrow, &rout)); 1130 PetscCall(ISRestoreIndices(iscol, &cout)); 1131 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1132 PetscCall(MatDenseRestoreArray(X, &x)); 1133 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X) 1138 { 1139 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1140 IS iscol = a->col, isrow = a->row; 1141 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j; 1142 PetscInt nz, neq, ldb, ldx; 1143 const PetscInt *rout, *cout, *r, *c; 1144 PetscScalar *x, *tmp = a->solve_work, s1; 1145 const PetscScalar *b, *aa = a->a, *v; 1146 PetscBool isdense; 1147 1148 PetscFunctionBegin; 1149 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1150 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1151 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1152 if (X != B) { 1153 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1154 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1155 } 1156 PetscCall(MatDenseGetArrayRead(B, &b)); 1157 PetscCall(MatDenseGetLDA(B, &ldb)); 1158 PetscCall(MatDenseGetArray(X, &x)); 1159 PetscCall(MatDenseGetLDA(X, &ldx)); 1160 tmp = a->solve_work; 1161 PetscCall(ISGetIndices(isrow, &rout)); 1162 r = rout; 1163 PetscCall(ISGetIndices(iscol, &cout)); 1164 c = cout; 1165 for (neq = 0; neq < B->cmap->n; neq++) { 1166 /* copy the b into temp work space according to permutation */ 1167 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1168 1169 /* forward solve the U^T */ 1170 for (i = 0; i < n; i++) { 1171 v = aa + adiag[i + 1] + 1; 1172 vi = aj + adiag[i + 1] + 1; 1173 nz = adiag[i] - adiag[i + 1] - 1; 1174 s1 = tmp[i]; 1175 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1176 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1177 tmp[i] = s1; 1178 } 1179 1180 /* backward solve the L^T */ 1181 for (i = n - 1; i >= 0; i--) { 1182 v = aa + ai[i]; 1183 vi = aj + ai[i]; 1184 nz = ai[i + 1] - ai[i]; 1185 s1 = tmp[i]; 1186 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1187 } 1188 1189 /* copy tmp into x according to permutation */ 1190 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1191 b += ldb; 1192 x += ldx; 1193 } 1194 PetscCall(ISRestoreIndices(isrow, &rout)); 1195 PetscCall(ISRestoreIndices(iscol, &cout)); 1196 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1197 PetscCall(MatDenseRestoreArray(X, &x)); 1198 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1199 PetscFunctionReturn(PETSC_SUCCESS); 1200 } 1201 1202 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 1203 { 1204 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1205 IS iscol = a->col, isrow = a->row; 1206 const PetscInt *r, *c, *rout, *cout; 1207 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1208 PetscInt nz, row; 1209 PetscScalar *x, *tmp, *tmps, sum; 1210 const PetscScalar *b; 1211 const MatScalar *aa = a->a, *v; 1212 1213 PetscFunctionBegin; 1214 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1215 1216 PetscCall(VecGetArrayRead(bb, &b)); 1217 PetscCall(VecGetArrayWrite(xx, &x)); 1218 tmp = a->solve_work; 1219 1220 PetscCall(ISGetIndices(isrow, &rout)); 1221 r = rout; 1222 PetscCall(ISGetIndices(iscol, &cout)); 1223 c = cout + (n - 1); 1224 1225 /* forward solve the lower triangular */ 1226 tmp[0] = b[*r++]; 1227 tmps = tmp; 1228 for (row = 1; row < n; row++) { 1229 i = rout[row]; /* permuted row */ 1230 v = aa + ai[i]; 1231 vi = aj + ai[i]; 1232 nz = a->diag[i] - ai[i]; 1233 sum = b[*r++]; 1234 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1235 tmp[row] = sum; 1236 } 1237 1238 /* backward solve the upper triangular */ 1239 for (row = n - 1; row >= 0; row--) { 1240 i = rout[row]; /* permuted row */ 1241 v = aa + a->diag[i] + 1; 1242 vi = aj + a->diag[i] + 1; 1243 nz = ai[i + 1] - a->diag[i] - 1; 1244 sum = tmp[row]; 1245 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1246 x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1247 } 1248 1249 PetscCall(ISRestoreIndices(isrow, &rout)); 1250 PetscCall(ISRestoreIndices(iscol, &cout)); 1251 PetscCall(VecRestoreArrayRead(bb, &b)); 1252 PetscCall(VecRestoreArrayWrite(xx, &x)); 1253 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1254 PetscFunctionReturn(PETSC_SUCCESS); 1255 } 1256 1257 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1258 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1259 { 1260 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1261 PetscInt n = A->rmap->n; 1262 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1263 PetscScalar *x; 1264 const PetscScalar *b; 1265 const MatScalar *aa = a->a; 1266 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1267 PetscInt adiag_i, i, nz, ai_i; 1268 const PetscInt *vi; 1269 const MatScalar *v; 1270 PetscScalar sum; 1271 #endif 1272 1273 PetscFunctionBegin; 1274 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1275 1276 PetscCall(VecGetArrayRead(bb, &b)); 1277 PetscCall(VecGetArrayWrite(xx, &x)); 1278 1279 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1280 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1281 #else 1282 /* forward solve the lower triangular */ 1283 x[0] = b[0]; 1284 for (i = 1; i < n; i++) { 1285 ai_i = ai[i]; 1286 v = aa + ai_i; 1287 vi = aj + ai_i; 1288 nz = adiag[i] - ai_i; 1289 sum = b[i]; 1290 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1291 x[i] = sum; 1292 } 1293 1294 /* backward solve the upper triangular */ 1295 for (i = n - 1; i >= 0; i--) { 1296 adiag_i = adiag[i]; 1297 v = aa + adiag_i + 1; 1298 vi = aj + adiag_i + 1; 1299 nz = ai[i + 1] - adiag_i - 1; 1300 sum = x[i]; 1301 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1302 x[i] = sum * aa[adiag_i]; 1303 } 1304 #endif 1305 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1306 PetscCall(VecRestoreArrayRead(bb, &b)); 1307 PetscCall(VecRestoreArrayWrite(xx, &x)); 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 1311 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1312 { 1313 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1314 IS iscol = a->col, isrow = a->row; 1315 PetscInt i, n = A->rmap->n, j; 1316 PetscInt nz; 1317 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 1318 PetscScalar *x, *tmp, sum; 1319 const PetscScalar *b; 1320 const MatScalar *aa = a->a, *v; 1321 1322 PetscFunctionBegin; 1323 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1324 1325 PetscCall(VecGetArrayRead(bb, &b)); 1326 PetscCall(VecGetArray(xx, &x)); 1327 tmp = a->solve_work; 1328 1329 PetscCall(ISGetIndices(isrow, &rout)); 1330 r = rout; 1331 PetscCall(ISGetIndices(iscol, &cout)); 1332 c = cout + (n - 1); 1333 1334 /* forward solve the lower triangular */ 1335 tmp[0] = b[*r++]; 1336 for (i = 1; i < n; i++) { 1337 v = aa + ai[i]; 1338 vi = aj + ai[i]; 1339 nz = a->diag[i] - ai[i]; 1340 sum = b[*r++]; 1341 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1342 tmp[i] = sum; 1343 } 1344 1345 /* backward solve the upper triangular */ 1346 for (i = n - 1; i >= 0; i--) { 1347 v = aa + a->diag[i] + 1; 1348 vi = aj + a->diag[i] + 1; 1349 nz = ai[i + 1] - a->diag[i] - 1; 1350 sum = tmp[i]; 1351 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1352 tmp[i] = sum * aa[a->diag[i]]; 1353 x[*c--] += tmp[i]; 1354 } 1355 1356 PetscCall(ISRestoreIndices(isrow, &rout)); 1357 PetscCall(ISRestoreIndices(iscol, &cout)); 1358 PetscCall(VecRestoreArrayRead(bb, &b)); 1359 PetscCall(VecRestoreArray(xx, &x)); 1360 PetscCall(PetscLogFlops(2.0 * a->nz)); 1361 PetscFunctionReturn(PETSC_SUCCESS); 1362 } 1363 1364 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1365 { 1366 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1367 IS iscol = a->col, isrow = a->row; 1368 PetscInt i, n = A->rmap->n, j; 1369 PetscInt nz; 1370 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1371 PetscScalar *x, *tmp, sum; 1372 const PetscScalar *b; 1373 const MatScalar *aa = a->a, *v; 1374 1375 PetscFunctionBegin; 1376 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1377 1378 PetscCall(VecGetArrayRead(bb, &b)); 1379 PetscCall(VecGetArray(xx, &x)); 1380 tmp = a->solve_work; 1381 1382 PetscCall(ISGetIndices(isrow, &rout)); 1383 r = rout; 1384 PetscCall(ISGetIndices(iscol, &cout)); 1385 c = cout; 1386 1387 /* forward solve the lower triangular */ 1388 tmp[0] = b[r[0]]; 1389 v = aa; 1390 vi = aj; 1391 for (i = 1; i < n; i++) { 1392 nz = ai[i + 1] - ai[i]; 1393 sum = b[r[i]]; 1394 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1395 tmp[i] = sum; 1396 v += nz; 1397 vi += nz; 1398 } 1399 1400 /* backward solve the upper triangular */ 1401 v = aa + adiag[n - 1]; 1402 vi = aj + adiag[n - 1]; 1403 for (i = n - 1; i >= 0; i--) { 1404 nz = adiag[i] - adiag[i + 1] - 1; 1405 sum = tmp[i]; 1406 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1407 tmp[i] = sum * v[nz]; 1408 x[c[i]] += tmp[i]; 1409 v += nz + 1; 1410 vi += nz + 1; 1411 } 1412 1413 PetscCall(ISRestoreIndices(isrow, &rout)); 1414 PetscCall(ISRestoreIndices(iscol, &cout)); 1415 PetscCall(VecRestoreArrayRead(bb, &b)); 1416 PetscCall(VecRestoreArray(xx, &x)); 1417 PetscCall(PetscLogFlops(2.0 * a->nz)); 1418 PetscFunctionReturn(PETSC_SUCCESS); 1419 } 1420 1421 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1422 { 1423 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1424 IS iscol = a->col, isrow = a->row; 1425 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1426 PetscInt i, n = A->rmap->n, j; 1427 PetscInt nz; 1428 PetscScalar *x, *tmp, s1; 1429 const MatScalar *aa = a->a, *v; 1430 const PetscScalar *b; 1431 1432 PetscFunctionBegin; 1433 PetscCall(VecGetArrayRead(bb, &b)); 1434 PetscCall(VecGetArrayWrite(xx, &x)); 1435 tmp = a->solve_work; 1436 1437 PetscCall(ISGetIndices(isrow, &rout)); 1438 r = rout; 1439 PetscCall(ISGetIndices(iscol, &cout)); 1440 c = cout; 1441 1442 /* copy the b into temp work space according to permutation */ 1443 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1444 1445 /* forward solve the U^T */ 1446 for (i = 0; i < n; i++) { 1447 v = aa + diag[i]; 1448 vi = aj + diag[i] + 1; 1449 nz = ai[i + 1] - diag[i] - 1; 1450 s1 = tmp[i]; 1451 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1452 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1453 tmp[i] = s1; 1454 } 1455 1456 /* backward solve the L^T */ 1457 for (i = n - 1; i >= 0; i--) { 1458 v = aa + diag[i] - 1; 1459 vi = aj + diag[i] - 1; 1460 nz = diag[i] - ai[i]; 1461 s1 = tmp[i]; 1462 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1463 } 1464 1465 /* copy tmp into x according to permutation */ 1466 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1467 1468 PetscCall(ISRestoreIndices(isrow, &rout)); 1469 PetscCall(ISRestoreIndices(iscol, &cout)); 1470 PetscCall(VecRestoreArrayRead(bb, &b)); 1471 PetscCall(VecRestoreArrayWrite(xx, &x)); 1472 1473 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1474 PetscFunctionReturn(PETSC_SUCCESS); 1475 } 1476 1477 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1478 { 1479 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1480 IS iscol = a->col, isrow = a->row; 1481 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1482 PetscInt i, n = A->rmap->n, j; 1483 PetscInt nz; 1484 PetscScalar *x, *tmp, s1; 1485 const MatScalar *aa = a->a, *v; 1486 const PetscScalar *b; 1487 1488 PetscFunctionBegin; 1489 PetscCall(VecGetArrayRead(bb, &b)); 1490 PetscCall(VecGetArrayWrite(xx, &x)); 1491 tmp = a->solve_work; 1492 1493 PetscCall(ISGetIndices(isrow, &rout)); 1494 r = rout; 1495 PetscCall(ISGetIndices(iscol, &cout)); 1496 c = cout; 1497 1498 /* copy the b into temp work space according to permutation */ 1499 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1500 1501 /* forward solve the U^T */ 1502 for (i = 0; i < n; i++) { 1503 v = aa + adiag[i + 1] + 1; 1504 vi = aj + adiag[i + 1] + 1; 1505 nz = adiag[i] - adiag[i + 1] - 1; 1506 s1 = tmp[i]; 1507 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1508 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1509 tmp[i] = s1; 1510 } 1511 1512 /* backward solve the L^T */ 1513 for (i = n - 1; i >= 0; i--) { 1514 v = aa + ai[i]; 1515 vi = aj + ai[i]; 1516 nz = ai[i + 1] - ai[i]; 1517 s1 = tmp[i]; 1518 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1519 } 1520 1521 /* copy tmp into x according to permutation */ 1522 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1523 1524 PetscCall(ISRestoreIndices(isrow, &rout)); 1525 PetscCall(ISRestoreIndices(iscol, &cout)); 1526 PetscCall(VecRestoreArrayRead(bb, &b)); 1527 PetscCall(VecRestoreArrayWrite(xx, &x)); 1528 1529 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1530 PetscFunctionReturn(PETSC_SUCCESS); 1531 } 1532 1533 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1534 { 1535 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1536 IS iscol = a->col, isrow = a->row; 1537 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1538 PetscInt i, n = A->rmap->n, j; 1539 PetscInt nz; 1540 PetscScalar *x, *tmp, s1; 1541 const MatScalar *aa = a->a, *v; 1542 const PetscScalar *b; 1543 1544 PetscFunctionBegin; 1545 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1546 PetscCall(VecGetArrayRead(bb, &b)); 1547 PetscCall(VecGetArray(xx, &x)); 1548 tmp = a->solve_work; 1549 1550 PetscCall(ISGetIndices(isrow, &rout)); 1551 r = rout; 1552 PetscCall(ISGetIndices(iscol, &cout)); 1553 c = cout; 1554 1555 /* copy the b into temp work space according to permutation */ 1556 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1557 1558 /* forward solve the U^T */ 1559 for (i = 0; i < n; i++) { 1560 v = aa + diag[i]; 1561 vi = aj + diag[i] + 1; 1562 nz = ai[i + 1] - diag[i] - 1; 1563 s1 = tmp[i]; 1564 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1565 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1566 tmp[i] = s1; 1567 } 1568 1569 /* backward solve the L^T */ 1570 for (i = n - 1; i >= 0; i--) { 1571 v = aa + diag[i] - 1; 1572 vi = aj + diag[i] - 1; 1573 nz = diag[i] - ai[i]; 1574 s1 = tmp[i]; 1575 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1576 } 1577 1578 /* copy tmp into x according to permutation */ 1579 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1580 1581 PetscCall(ISRestoreIndices(isrow, &rout)); 1582 PetscCall(ISRestoreIndices(iscol, &cout)); 1583 PetscCall(VecRestoreArrayRead(bb, &b)); 1584 PetscCall(VecRestoreArray(xx, &x)); 1585 1586 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1587 PetscFunctionReturn(PETSC_SUCCESS); 1588 } 1589 1590 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1591 { 1592 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1593 IS iscol = a->col, isrow = a->row; 1594 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1595 PetscInt i, n = A->rmap->n, j; 1596 PetscInt nz; 1597 PetscScalar *x, *tmp, s1; 1598 const MatScalar *aa = a->a, *v; 1599 const PetscScalar *b; 1600 1601 PetscFunctionBegin; 1602 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1603 PetscCall(VecGetArrayRead(bb, &b)); 1604 PetscCall(VecGetArray(xx, &x)); 1605 tmp = a->solve_work; 1606 1607 PetscCall(ISGetIndices(isrow, &rout)); 1608 r = rout; 1609 PetscCall(ISGetIndices(iscol, &cout)); 1610 c = cout; 1611 1612 /* copy the b into temp work space according to permutation */ 1613 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1614 1615 /* forward solve the U^T */ 1616 for (i = 0; i < n; i++) { 1617 v = aa + adiag[i + 1] + 1; 1618 vi = aj + adiag[i + 1] + 1; 1619 nz = adiag[i] - adiag[i + 1] - 1; 1620 s1 = tmp[i]; 1621 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1622 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1623 tmp[i] = s1; 1624 } 1625 1626 /* backward solve the L^T */ 1627 for (i = n - 1; i >= 0; i--) { 1628 v = aa + ai[i]; 1629 vi = aj + ai[i]; 1630 nz = ai[i + 1] - ai[i]; 1631 s1 = tmp[i]; 1632 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1633 } 1634 1635 /* copy tmp into x according to permutation */ 1636 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1637 1638 PetscCall(ISRestoreIndices(isrow, &rout)); 1639 PetscCall(ISRestoreIndices(iscol, &cout)); 1640 PetscCall(VecRestoreArrayRead(bb, &b)); 1641 PetscCall(VecRestoreArray(xx, &x)); 1642 1643 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1644 PetscFunctionReturn(PETSC_SUCCESS); 1645 } 1646 1647 /* 1648 ilu() under revised new data structure. 1649 Factored arrays bj and ba are stored as 1650 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1651 1652 bi=fact->i is an array of size n+1, in which 1653 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1654 bi[n]: points to L(n-1,n-1)+1 1655 1656 bdiag=fact->diag is an array of size n+1,in which 1657 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1658 bdiag[n]: points to entry of U(n-1,0)-1 1659 1660 U(i,:) contains bdiag[i] as its last entry, i.e., 1661 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1662 */ 1663 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1664 { 1665 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1666 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1667 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1668 IS isicol; 1669 1670 PetscFunctionBegin; 1671 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1672 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1673 b = (Mat_SeqAIJ *)(fact)->data; 1674 1675 /* allocate matrix arrays for new data structure */ 1676 PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i)); 1677 1678 b->singlemalloc = PETSC_TRUE; 1679 if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1680 bdiag = b->diag; 1681 1682 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1683 1684 /* set bi and bj with new data structure */ 1685 bi = b->i; 1686 bj = b->j; 1687 1688 /* L part */ 1689 bi[0] = 0; 1690 for (i = 0; i < n; i++) { 1691 nz = adiag[i] - ai[i]; 1692 bi[i + 1] = bi[i] + nz; 1693 aj = a->j + ai[i]; 1694 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1695 } 1696 1697 /* U part */ 1698 bdiag[n] = bi[n] - 1; 1699 for (i = n - 1; i >= 0; i--) { 1700 nz = ai[i + 1] - adiag[i] - 1; 1701 aj = a->j + adiag[i] + 1; 1702 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1703 /* diag[i] */ 1704 bj[k++] = i; 1705 bdiag[i] = bdiag[i + 1] + nz + 1; 1706 } 1707 1708 fact->factortype = MAT_FACTOR_ILU; 1709 fact->info.factor_mallocs = 0; 1710 fact->info.fill_ratio_given = info->fill; 1711 fact->info.fill_ratio_needed = 1.0; 1712 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1713 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1714 1715 b = (Mat_SeqAIJ *)(fact)->data; 1716 b->row = isrow; 1717 b->col = iscol; 1718 b->icol = isicol; 1719 PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 1720 PetscCall(PetscObjectReference((PetscObject)isrow)); 1721 PetscCall(PetscObjectReference((PetscObject)iscol)); 1722 PetscFunctionReturn(PETSC_SUCCESS); 1723 } 1724 1725 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1726 { 1727 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1728 IS isicol; 1729 const PetscInt *r, *ic; 1730 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1731 PetscInt *bi, *cols, nnz, *cols_lvl; 1732 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1733 PetscInt i, levels, diagonal_fill; 1734 PetscBool col_identity, row_identity, missing; 1735 PetscReal f; 1736 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1737 PetscBT lnkbt; 1738 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1739 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1740 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1741 1742 PetscFunctionBegin; 1743 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1744 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1745 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1746 1747 levels = (PetscInt)info->levels; 1748 PetscCall(ISIdentity(isrow, &row_identity)); 1749 PetscCall(ISIdentity(iscol, &col_identity)); 1750 if (!levels && row_identity && col_identity) { 1751 /* special case: ilu(0) with natural ordering */ 1752 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1753 if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1754 PetscFunctionReturn(PETSC_SUCCESS); 1755 } 1756 1757 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1758 PetscCall(ISGetIndices(isrow, &r)); 1759 PetscCall(ISGetIndices(isicol, &ic)); 1760 1761 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1762 PetscCall(PetscMalloc1(n + 1, &bi)); 1763 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1764 bi[0] = bdiag[0] = 0; 1765 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1766 1767 /* create a linked list for storing column indices of the active row */ 1768 nlnk = n + 1; 1769 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1770 1771 /* initial FreeSpace size is f*(ai[n]+1) */ 1772 f = info->fill; 1773 diagonal_fill = (PetscInt)info->diagonal_fill; 1774 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1775 current_space = free_space; 1776 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1777 current_space_lvl = free_space_lvl; 1778 for (i = 0; i < n; i++) { 1779 nzi = 0; 1780 /* copy current row into linked list */ 1781 nnz = ai[r[i] + 1] - ai[r[i]]; 1782 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1783 cols = aj + ai[r[i]]; 1784 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1785 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1786 nzi += nlnk; 1787 1788 /* make sure diagonal entry is included */ 1789 if (diagonal_fill && lnk[i] == -1) { 1790 fm = n; 1791 while (lnk[fm] < i) fm = lnk[fm]; 1792 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1793 lnk[fm] = i; 1794 lnk_lvl[i] = 0; 1795 nzi++; 1796 dcount++; 1797 } 1798 1799 /* add pivot rows into the active row */ 1800 nzbd = 0; 1801 prow = lnk[n]; 1802 while (prow < i) { 1803 nnz = bdiag[prow]; 1804 cols = bj_ptr[prow] + nnz + 1; 1805 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1806 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1807 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1808 nzi += nlnk; 1809 prow = lnk[prow]; 1810 nzbd++; 1811 } 1812 bdiag[i] = nzbd; 1813 bi[i + 1] = bi[i] + nzi; 1814 /* if free space is not available, make more free space */ 1815 if (current_space->local_remaining < nzi) { 1816 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1817 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1818 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1819 reallocs++; 1820 } 1821 1822 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1823 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1824 bj_ptr[i] = current_space->array; 1825 bjlvl_ptr[i] = current_space_lvl->array; 1826 1827 /* make sure the active row i has diagonal entry */ 1828 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1829 1830 current_space->array += nzi; 1831 current_space->local_used += nzi; 1832 current_space->local_remaining -= nzi; 1833 current_space_lvl->array += nzi; 1834 current_space_lvl->local_used += nzi; 1835 current_space_lvl->local_remaining -= nzi; 1836 } 1837 1838 PetscCall(ISRestoreIndices(isrow, &r)); 1839 PetscCall(ISRestoreIndices(isicol, &ic)); 1840 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1841 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 1842 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1843 1844 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1845 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1846 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1847 1848 #if defined(PETSC_USE_INFO) 1849 { 1850 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1851 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1852 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1853 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1854 PetscCall(PetscInfo(A, "for best performance.\n")); 1855 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1856 } 1857 #endif 1858 /* put together the new matrix */ 1859 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1860 b = (Mat_SeqAIJ *)(fact)->data; 1861 1862 b->free_a = PETSC_TRUE; 1863 b->free_ij = PETSC_TRUE; 1864 b->singlemalloc = PETSC_FALSE; 1865 1866 PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a)); 1867 1868 b->j = bj; 1869 b->i = bi; 1870 b->diag = bdiag; 1871 b->ilen = NULL; 1872 b->imax = NULL; 1873 b->row = isrow; 1874 b->col = iscol; 1875 PetscCall(PetscObjectReference((PetscObject)isrow)); 1876 PetscCall(PetscObjectReference((PetscObject)iscol)); 1877 b->icol = isicol; 1878 1879 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1880 /* In b structure: Free imax, ilen, old a, old j. 1881 Allocate bdiag, solve_work, new a, new j */ 1882 b->maxnz = b->nz = bdiag[0] + 1; 1883 1884 (fact)->info.factor_mallocs = reallocs; 1885 (fact)->info.fill_ratio_given = f; 1886 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1887 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1888 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1889 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1890 PetscFunctionReturn(PETSC_SUCCESS); 1891 } 1892 1893 #if 0 1894 // unused 1895 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1896 { 1897 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1898 IS isicol; 1899 const PetscInt *r, *ic; 1900 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1901 PetscInt *bi, *cols, nnz, *cols_lvl; 1902 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1903 PetscInt i, levels, diagonal_fill; 1904 PetscBool col_identity, row_identity; 1905 PetscReal f; 1906 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1907 PetscBT lnkbt; 1908 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1909 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1910 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1911 PetscBool missing; 1912 1913 PetscFunctionBegin; 1914 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1915 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1916 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1917 1918 f = info->fill; 1919 levels = (PetscInt)info->levels; 1920 diagonal_fill = (PetscInt)info->diagonal_fill; 1921 1922 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1923 1924 PetscCall(ISIdentity(isrow, &row_identity)); 1925 PetscCall(ISIdentity(iscol, &col_identity)); 1926 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1927 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 1928 1929 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1930 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1931 fact->factortype = MAT_FACTOR_ILU; 1932 (fact)->info.factor_mallocs = 0; 1933 (fact)->info.fill_ratio_given = info->fill; 1934 (fact)->info.fill_ratio_needed = 1.0; 1935 1936 b = (Mat_SeqAIJ *)(fact)->data; 1937 b->row = isrow; 1938 b->col = iscol; 1939 b->icol = isicol; 1940 PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 1941 PetscCall(PetscObjectReference((PetscObject)isrow)); 1942 PetscCall(PetscObjectReference((PetscObject)iscol)); 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 PetscCall(ISGetIndices(isrow, &r)); 1947 PetscCall(ISGetIndices(isicol, &ic)); 1948 1949 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1950 PetscCall(PetscMalloc1(n + 1, &bi)); 1951 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1952 bi[0] = bdiag[0] = 0; 1953 1954 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1955 1956 /* create a linked list for storing column indices of the active row */ 1957 nlnk = n + 1; 1958 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1959 1960 /* initial FreeSpace size is f*(ai[n]+1) */ 1961 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1962 current_space = free_space; 1963 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1964 current_space_lvl = free_space_lvl; 1965 1966 for (i = 0; i < n; i++) { 1967 nzi = 0; 1968 /* copy current row into linked list */ 1969 nnz = ai[r[i] + 1] - ai[r[i]]; 1970 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1971 cols = aj + ai[r[i]]; 1972 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1973 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1974 nzi += nlnk; 1975 1976 /* make sure diagonal entry is included */ 1977 if (diagonal_fill && lnk[i] == -1) { 1978 fm = n; 1979 while (lnk[fm] < i) fm = lnk[fm]; 1980 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1981 lnk[fm] = i; 1982 lnk_lvl[i] = 0; 1983 nzi++; 1984 dcount++; 1985 } 1986 1987 /* add pivot rows into the active row */ 1988 nzbd = 0; 1989 prow = lnk[n]; 1990 while (prow < i) { 1991 nnz = bdiag[prow]; 1992 cols = bj_ptr[prow] + nnz + 1; 1993 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1994 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1995 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1996 nzi += nlnk; 1997 prow = lnk[prow]; 1998 nzbd++; 1999 } 2000 bdiag[i] = nzbd; 2001 bi[i + 1] = bi[i] + nzi; 2002 2003 /* if free space is not available, make more free space */ 2004 if (current_space->local_remaining < nzi) { 2005 nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 2006 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 2007 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 2008 reallocs++; 2009 } 2010 2011 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2012 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2013 bj_ptr[i] = current_space->array; 2014 bjlvl_ptr[i] = current_space_lvl->array; 2015 2016 /* make sure the active row i has diagonal entry */ 2017 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 2018 2019 current_space->array += nzi; 2020 current_space->local_used += nzi; 2021 current_space->local_remaining -= nzi; 2022 current_space_lvl->array += nzi; 2023 current_space_lvl->local_used += nzi; 2024 current_space_lvl->local_remaining -= nzi; 2025 } 2026 2027 PetscCall(ISRestoreIndices(isrow, &r)); 2028 PetscCall(ISRestoreIndices(isicol, &ic)); 2029 2030 /* destroy list of free space and other temporary arrays */ 2031 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 2032 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 2033 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2034 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2035 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 2036 2037 #if defined(PETSC_USE_INFO) 2038 { 2039 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2040 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 2041 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 2042 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 2043 PetscCall(PetscInfo(A, "for best performance.\n")); 2044 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 2045 } 2046 #endif 2047 2048 /* put together the new matrix */ 2049 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 2050 b = (Mat_SeqAIJ *)(fact)->data; 2051 2052 b->free_a = PETSC_TRUE; 2053 b->free_ij = PETSC_TRUE; 2054 b->singlemalloc = PETSC_FALSE; 2055 2056 PetscCall(PetscMalloc1(bi[n], &b->a)); 2057 b->j = bj; 2058 b->i = bi; 2059 for (i = 0; i < n; i++) bdiag[i] += bi[i]; 2060 b->diag = bdiag; 2061 b->ilen = NULL; 2062 b->imax = NULL; 2063 b->row = isrow; 2064 b->col = iscol; 2065 PetscCall(PetscObjectReference((PetscObject)isrow)); 2066 PetscCall(PetscObjectReference((PetscObject)iscol)); 2067 b->icol = isicol; 2068 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2069 /* In b structure: Free imax, ilen, old a, old j. 2070 Allocate bdiag, solve_work, new a, new j */ 2071 b->maxnz = b->nz = bi[n]; 2072 2073 (fact)->info.factor_mallocs = reallocs; 2074 (fact)->info.fill_ratio_given = f; 2075 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2076 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2077 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 #endif 2081 2082 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 2083 { 2084 Mat C = B; 2085 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2086 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2087 IS ip = b->row, iip = b->icol; 2088 const PetscInt *rip, *riip; 2089 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 2090 PetscInt *ai = a->i, *aj = a->j; 2091 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 2092 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2093 PetscBool perm_identity; 2094 FactorShiftCtx sctx; 2095 PetscReal rs; 2096 MatScalar d, *v; 2097 2098 PetscFunctionBegin; 2099 /* MatPivotSetUp(): initialize shift context sctx */ 2100 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2101 2102 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2103 sctx.shift_top = info->zeropivot; 2104 for (i = 0; i < mbs; i++) { 2105 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2106 d = (aa)[a->diag[i]]; 2107 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2108 v = aa + ai[i]; 2109 nz = ai[i + 1] - ai[i]; 2110 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2111 if (rs > sctx.shift_top) sctx.shift_top = rs; 2112 } 2113 sctx.shift_top *= 1.1; 2114 sctx.nshift_max = 5; 2115 sctx.shift_lo = 0.; 2116 sctx.shift_hi = 1.; 2117 } 2118 2119 PetscCall(ISGetIndices(ip, &rip)); 2120 PetscCall(ISGetIndices(iip, &riip)); 2121 2122 /* allocate working arrays 2123 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2124 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2125 */ 2126 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 2127 2128 do { 2129 sctx.newshift = PETSC_FALSE; 2130 2131 for (i = 0; i < mbs; i++) c2r[i] = mbs; 2132 if (mbs) il[0] = 0; 2133 2134 for (k = 0; k < mbs; k++) { 2135 /* zero rtmp */ 2136 nz = bi[k + 1] - bi[k]; 2137 bjtmp = bj + bi[k]; 2138 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2139 2140 /* load in initial unfactored row */ 2141 bval = ba + bi[k]; 2142 jmin = ai[rip[k]]; 2143 jmax = ai[rip[k] + 1]; 2144 for (j = jmin; j < jmax; j++) { 2145 col = riip[aj[j]]; 2146 if (col >= k) { /* only take upper triangular entry */ 2147 rtmp[col] = aa[j]; 2148 *bval++ = 0.0; /* for in-place factorization */ 2149 } 2150 } 2151 /* shift the diagonal of the matrix: ZeropivotApply() */ 2152 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2153 2154 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2155 dk = rtmp[k]; 2156 i = c2r[k]; /* first row to be added to k_th row */ 2157 2158 while (i < k) { 2159 nexti = c2r[i]; /* next row to be added to k_th row */ 2160 2161 /* compute multiplier, update diag(k) and U(i,k) */ 2162 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2163 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 2164 dk += uikdi * ba[ili]; /* update diag[k] */ 2165 ba[ili] = uikdi; /* -U(i,k) */ 2166 2167 /* add multiple of row i to k-th row */ 2168 jmin = ili + 1; 2169 jmax = bi[i + 1]; 2170 if (jmin < jmax) { 2171 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2172 /* update il and c2r for row i */ 2173 il[i] = jmin; 2174 j = bj[jmin]; 2175 c2r[i] = c2r[j]; 2176 c2r[j] = i; 2177 } 2178 i = nexti; 2179 } 2180 2181 /* copy data into U(k,:) */ 2182 rs = 0.0; 2183 jmin = bi[k]; 2184 jmax = bi[k + 1] - 1; 2185 if (jmin < jmax) { 2186 for (j = jmin; j < jmax; j++) { 2187 col = bj[j]; 2188 ba[j] = rtmp[col]; 2189 rs += PetscAbsScalar(ba[j]); 2190 } 2191 /* add the k-th row into il and c2r */ 2192 il[k] = jmin; 2193 i = bj[jmin]; 2194 c2r[k] = c2r[i]; 2195 c2r[i] = k; 2196 } 2197 2198 /* MatPivotCheck() */ 2199 sctx.rs = rs; 2200 sctx.pv = dk; 2201 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 2202 if (sctx.newshift) break; 2203 dk = sctx.pv; 2204 2205 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 2206 } 2207 } while (sctx.newshift); 2208 2209 PetscCall(PetscFree3(rtmp, il, c2r)); 2210 PetscCall(ISRestoreIndices(ip, &rip)); 2211 PetscCall(ISRestoreIndices(iip, &riip)); 2212 2213 PetscCall(ISIdentity(ip, &perm_identity)); 2214 if (perm_identity) { 2215 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2216 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2217 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2218 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2219 } else { 2220 B->ops->solve = MatSolve_SeqSBAIJ_1; 2221 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2222 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2223 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2224 } 2225 2226 C->assembled = PETSC_TRUE; 2227 C->preallocated = PETSC_TRUE; 2228 2229 PetscCall(PetscLogFlops(C->rmap->n)); 2230 2231 /* MatPivotView() */ 2232 if (sctx.nshift) { 2233 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2234 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2235 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2236 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2237 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2238 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 2239 } 2240 } 2241 PetscFunctionReturn(PETSC_SUCCESS); 2242 } 2243 2244 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 2245 { 2246 Mat C = B; 2247 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2248 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2249 IS ip = b->row, iip = b->icol; 2250 const PetscInt *rip, *riip; 2251 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 2252 PetscInt *ai = a->i, *aj = a->j; 2253 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2254 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2255 PetscBool perm_identity; 2256 FactorShiftCtx sctx; 2257 PetscReal rs; 2258 MatScalar d, *v; 2259 2260 PetscFunctionBegin; 2261 /* MatPivotSetUp(): initialize shift context sctx */ 2262 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2263 2264 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2265 sctx.shift_top = info->zeropivot; 2266 for (i = 0; i < mbs; i++) { 2267 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2268 d = (aa)[a->diag[i]]; 2269 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2270 v = aa + ai[i]; 2271 nz = ai[i + 1] - ai[i]; 2272 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2273 if (rs > sctx.shift_top) sctx.shift_top = rs; 2274 } 2275 sctx.shift_top *= 1.1; 2276 sctx.nshift_max = 5; 2277 sctx.shift_lo = 0.; 2278 sctx.shift_hi = 1.; 2279 } 2280 2281 PetscCall(ISGetIndices(ip, &rip)); 2282 PetscCall(ISGetIndices(iip, &riip)); 2283 2284 /* initialization */ 2285 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 2286 2287 do { 2288 sctx.newshift = PETSC_FALSE; 2289 2290 for (i = 0; i < mbs; i++) jl[i] = mbs; 2291 il[0] = 0; 2292 2293 for (k = 0; k < mbs; k++) { 2294 /* zero rtmp */ 2295 nz = bi[k + 1] - bi[k]; 2296 bjtmp = bj + bi[k]; 2297 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2298 2299 bval = ba + bi[k]; 2300 /* initialize k-th row by the perm[k]-th row of A */ 2301 jmin = ai[rip[k]]; 2302 jmax = ai[rip[k] + 1]; 2303 for (j = jmin; j < jmax; j++) { 2304 col = riip[aj[j]]; 2305 if (col >= k) { /* only take upper triangular entry */ 2306 rtmp[col] = aa[j]; 2307 *bval++ = 0.0; /* for in-place factorization */ 2308 } 2309 } 2310 /* shift the diagonal of the matrix */ 2311 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2312 2313 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2314 dk = rtmp[k]; 2315 i = jl[k]; /* first row to be added to k_th row */ 2316 2317 while (i < k) { 2318 nexti = jl[i]; /* next row to be added to k_th row */ 2319 2320 /* compute multiplier, update diag(k) and U(i,k) */ 2321 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2322 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 2323 dk += uikdi * ba[ili]; 2324 ba[ili] = uikdi; /* -U(i,k) */ 2325 2326 /* add multiple of row i to k-th row */ 2327 jmin = ili + 1; 2328 jmax = bi[i + 1]; 2329 if (jmin < jmax) { 2330 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2331 /* update il and jl for row i */ 2332 il[i] = jmin; 2333 j = bj[jmin]; 2334 jl[i] = jl[j]; 2335 jl[j] = i; 2336 } 2337 i = nexti; 2338 } 2339 2340 /* shift the diagonals when zero pivot is detected */ 2341 /* compute rs=sum of abs(off-diagonal) */ 2342 rs = 0.0; 2343 jmin = bi[k] + 1; 2344 nz = bi[k + 1] - jmin; 2345 bcol = bj + jmin; 2346 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2347 2348 sctx.rs = rs; 2349 sctx.pv = dk; 2350 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 2351 if (sctx.newshift) break; 2352 dk = sctx.pv; 2353 2354 /* copy data into U(k,:) */ 2355 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 2356 jmin = bi[k] + 1; 2357 jmax = bi[k + 1]; 2358 if (jmin < jmax) { 2359 for (j = jmin; j < jmax; j++) { 2360 col = bj[j]; 2361 ba[j] = rtmp[col]; 2362 } 2363 /* add the k-th row into il and jl */ 2364 il[k] = jmin; 2365 i = bj[jmin]; 2366 jl[k] = jl[i]; 2367 jl[i] = k; 2368 } 2369 } 2370 } while (sctx.newshift); 2371 2372 PetscCall(PetscFree3(rtmp, il, jl)); 2373 PetscCall(ISRestoreIndices(ip, &rip)); 2374 PetscCall(ISRestoreIndices(iip, &riip)); 2375 2376 PetscCall(ISIdentity(ip, &perm_identity)); 2377 if (perm_identity) { 2378 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2379 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2380 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2381 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2382 } else { 2383 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2384 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2385 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2386 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2387 } 2388 2389 C->assembled = PETSC_TRUE; 2390 C->preallocated = PETSC_TRUE; 2391 2392 PetscCall(PetscLogFlops(C->rmap->n)); 2393 if (sctx.nshift) { 2394 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2395 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2396 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2397 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2398 } 2399 } 2400 PetscFunctionReturn(PETSC_SUCCESS); 2401 } 2402 2403 /* 2404 icc() under revised new data structure. 2405 Factored arrays bj and ba are stored as 2406 U(0,:),...,U(i,:),U(n-1,:) 2407 2408 ui=fact->i is an array of size n+1, in which 2409 ui+ 2410 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2411 ui[n]: points to U(n-1,n-1)+1 2412 2413 udiag=fact->diag is an array of size n,in which 2414 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2415 2416 U(i,:) contains udiag[i] as its last entry, i.e., 2417 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2418 */ 2419 2420 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2421 { 2422 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2423 Mat_SeqSBAIJ *b; 2424 PetscBool perm_identity, missing; 2425 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2426 const PetscInt *rip, *riip; 2427 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2428 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2429 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2430 PetscReal fill = info->fill, levels = info->levels; 2431 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2432 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2433 PetscBT lnkbt; 2434 IS iperm; 2435 2436 PetscFunctionBegin; 2437 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2438 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2439 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2440 PetscCall(ISIdentity(perm, &perm_identity)); 2441 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2442 2443 PetscCall(PetscMalloc1(am + 1, &ui)); 2444 PetscCall(PetscMalloc1(am + 1, &udiag)); 2445 ui[0] = 0; 2446 2447 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2448 if (!levels && perm_identity) { 2449 for (i = 0; i < am; i++) { 2450 ncols = ai[i + 1] - a->diag[i]; 2451 ui[i + 1] = ui[i] + ncols; 2452 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2453 } 2454 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2455 cols = uj; 2456 for (i = 0; i < am; i++) { 2457 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2458 ncols = ai[i + 1] - a->diag[i] - 1; 2459 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2460 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2461 } 2462 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2463 PetscCall(ISGetIndices(iperm, &riip)); 2464 PetscCall(ISGetIndices(perm, &rip)); 2465 2466 /* initialization */ 2467 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2468 2469 /* jl: linked list for storing indices of the pivot rows 2470 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2471 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2472 for (i = 0; i < am; i++) { 2473 jl[i] = am; 2474 il[i] = 0; 2475 } 2476 2477 /* create and initialize a linked list for storing column indices of the active row k */ 2478 nlnk = am + 1; 2479 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2480 2481 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2482 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2483 current_space = free_space; 2484 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2485 current_space_lvl = free_space_lvl; 2486 2487 for (k = 0; k < am; k++) { /* for each active row k */ 2488 /* initialize lnk by the column indices of row rip[k] of A */ 2489 nzk = 0; 2490 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2491 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2492 ncols_upper = 0; 2493 for (j = 0; j < ncols; j++) { 2494 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2495 if (riip[i] >= k) { /* only take upper triangular entry */ 2496 ajtmp[ncols_upper] = i; 2497 ncols_upper++; 2498 } 2499 } 2500 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2501 nzk += nlnk; 2502 2503 /* update lnk by computing fill-in for each pivot row to be merged in */ 2504 prow = jl[k]; /* 1st pivot row */ 2505 2506 while (prow < k) { 2507 nextprow = jl[prow]; 2508 2509 /* merge prow into k-th row */ 2510 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2511 jmax = ui[prow + 1]; 2512 ncols = jmax - jmin; 2513 i = jmin - ui[prow]; 2514 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2515 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2516 j = *(uj - 1); 2517 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2518 nzk += nlnk; 2519 2520 /* update il and jl for prow */ 2521 if (jmin < jmax) { 2522 il[prow] = jmin; 2523 j = *cols; 2524 jl[prow] = jl[j]; 2525 jl[j] = prow; 2526 } 2527 prow = nextprow; 2528 } 2529 2530 /* if free space is not available, make more free space */ 2531 if (current_space->local_remaining < nzk) { 2532 i = am - k + 1; /* num of unfactored rows */ 2533 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2534 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2535 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2536 reallocs++; 2537 } 2538 2539 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2540 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2541 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2542 2543 /* add the k-th row into il and jl */ 2544 if (nzk > 1) { 2545 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2546 jl[k] = jl[i]; 2547 jl[i] = k; 2548 il[k] = ui[k] + 1; 2549 } 2550 uj_ptr[k] = current_space->array; 2551 uj_lvl_ptr[k] = current_space_lvl->array; 2552 2553 current_space->array += nzk; 2554 current_space->local_used += nzk; 2555 current_space->local_remaining -= nzk; 2556 2557 current_space_lvl->array += nzk; 2558 current_space_lvl->local_used += nzk; 2559 current_space_lvl->local_remaining -= nzk; 2560 2561 ui[k + 1] = ui[k] + nzk; 2562 } 2563 2564 PetscCall(ISRestoreIndices(perm, &rip)); 2565 PetscCall(ISRestoreIndices(iperm, &riip)); 2566 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2567 PetscCall(PetscFree(ajtmp)); 2568 2569 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2570 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2571 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2572 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2573 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2574 2575 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2576 2577 /* put together the new matrix in MATSEQSBAIJ format */ 2578 b = (Mat_SeqSBAIJ *)(fact)->data; 2579 b->singlemalloc = PETSC_FALSE; 2580 2581 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2582 2583 b->j = uj; 2584 b->i = ui; 2585 b->diag = udiag; 2586 b->free_diag = PETSC_TRUE; 2587 b->ilen = NULL; 2588 b->imax = NULL; 2589 b->row = perm; 2590 b->col = perm; 2591 PetscCall(PetscObjectReference((PetscObject)perm)); 2592 PetscCall(PetscObjectReference((PetscObject)perm)); 2593 b->icol = iperm; 2594 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2595 2596 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2597 2598 b->maxnz = b->nz = ui[am]; 2599 b->free_a = PETSC_TRUE; 2600 b->free_ij = PETSC_TRUE; 2601 2602 fact->info.factor_mallocs = reallocs; 2603 fact->info.fill_ratio_given = fill; 2604 if (ai[am] != 0) { 2605 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2606 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2607 } else { 2608 fact->info.fill_ratio_needed = 0.0; 2609 } 2610 #if defined(PETSC_USE_INFO) 2611 if (ai[am] != 0) { 2612 PetscReal af = fact->info.fill_ratio_needed; 2613 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2614 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2615 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2616 } else { 2617 PetscCall(PetscInfo(A, "Empty matrix\n")); 2618 } 2619 #endif 2620 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2621 PetscFunctionReturn(PETSC_SUCCESS); 2622 } 2623 2624 #if 0 2625 // unused 2626 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2627 { 2628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2629 Mat_SeqSBAIJ *b; 2630 PetscBool perm_identity, missing; 2631 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2632 const PetscInt *rip, *riip; 2633 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2634 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2635 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2636 PetscReal fill = info->fill, levels = info->levels; 2637 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2638 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2639 PetscBT lnkbt; 2640 IS iperm; 2641 2642 PetscFunctionBegin; 2643 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2644 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2645 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2646 PetscCall(ISIdentity(perm, &perm_identity)); 2647 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2648 2649 PetscCall(PetscMalloc1(am + 1, &ui)); 2650 PetscCall(PetscMalloc1(am + 1, &udiag)); 2651 ui[0] = 0; 2652 2653 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2654 if (!levels && perm_identity) { 2655 for (i = 0; i < am; i++) { 2656 ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 2657 udiag[i] = ui[i]; 2658 } 2659 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2660 cols = uj; 2661 for (i = 0; i < am; i++) { 2662 aj = a->j + a->diag[i]; 2663 ncols = ui[i + 1] - ui[i]; 2664 for (j = 0; j < ncols; j++) *cols++ = *aj++; 2665 } 2666 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2667 PetscCall(ISGetIndices(iperm, &riip)); 2668 PetscCall(ISGetIndices(perm, &rip)); 2669 2670 /* initialization */ 2671 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2672 2673 /* jl: linked list for storing indices of the pivot rows 2674 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2675 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2676 for (i = 0; i < am; i++) { 2677 jl[i] = am; 2678 il[i] = 0; 2679 } 2680 2681 /* create and initialize a linked list for storing column indices of the active row k */ 2682 nlnk = am + 1; 2683 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2684 2685 /* initial FreeSpace size is fill*(ai[am]+1) */ 2686 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2687 current_space = free_space; 2688 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 2689 current_space_lvl = free_space_lvl; 2690 2691 for (k = 0; k < am; k++) { /* for each active row k */ 2692 /* initialize lnk by the column indices of row rip[k] of A */ 2693 nzk = 0; 2694 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2695 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2696 ncols_upper = 0; 2697 for (j = 0; j < ncols; j++) { 2698 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2699 if (riip[i] >= k) { /* only take upper triangular entry */ 2700 ajtmp[ncols_upper] = i; 2701 ncols_upper++; 2702 } 2703 } 2704 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2705 nzk += nlnk; 2706 2707 /* update lnk by computing fill-in for each pivot row to be merged in */ 2708 prow = jl[k]; /* 1st pivot row */ 2709 2710 while (prow < k) { 2711 nextprow = jl[prow]; 2712 2713 /* merge prow into k-th row */ 2714 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2715 jmax = ui[prow + 1]; 2716 ncols = jmax - jmin; 2717 i = jmin - ui[prow]; 2718 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2719 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2720 j = *(uj - 1); 2721 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2722 nzk += nlnk; 2723 2724 /* update il and jl for prow */ 2725 if (jmin < jmax) { 2726 il[prow] = jmin; 2727 j = *cols; 2728 jl[prow] = jl[j]; 2729 jl[j] = prow; 2730 } 2731 prow = nextprow; 2732 } 2733 2734 /* if free space is not available, make more free space */ 2735 if (current_space->local_remaining < nzk) { 2736 i = am - k + 1; /* num of unfactored rows */ 2737 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2738 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2739 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2740 reallocs++; 2741 } 2742 2743 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2744 PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2745 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2746 2747 /* add the k-th row into il and jl */ 2748 if (nzk > 1) { 2749 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2750 jl[k] = jl[i]; 2751 jl[i] = k; 2752 il[k] = ui[k] + 1; 2753 } 2754 uj_ptr[k] = current_space->array; 2755 uj_lvl_ptr[k] = current_space_lvl->array; 2756 2757 current_space->array += nzk; 2758 current_space->local_used += nzk; 2759 current_space->local_remaining -= nzk; 2760 2761 current_space_lvl->array += nzk; 2762 current_space_lvl->local_used += nzk; 2763 current_space_lvl->local_remaining -= nzk; 2764 2765 ui[k + 1] = ui[k] + nzk; 2766 } 2767 2768 #if defined(PETSC_USE_INFO) 2769 if (ai[am] != 0) { 2770 PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 2771 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2772 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2773 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2774 } else { 2775 PetscCall(PetscInfo(A, "Empty matrix\n")); 2776 } 2777 #endif 2778 2779 PetscCall(ISRestoreIndices(perm, &rip)); 2780 PetscCall(ISRestoreIndices(iperm, &riip)); 2781 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2782 PetscCall(PetscFree(ajtmp)); 2783 2784 /* destroy list of free space and other temporary array(s) */ 2785 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2786 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 2787 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2788 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2789 2790 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2791 2792 /* put together the new matrix in MATSEQSBAIJ format */ 2793 2794 b = (Mat_SeqSBAIJ *)fact->data; 2795 b->singlemalloc = PETSC_FALSE; 2796 2797 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2798 2799 b->j = uj; 2800 b->i = ui; 2801 b->diag = udiag; 2802 b->free_diag = PETSC_TRUE; 2803 b->ilen = NULL; 2804 b->imax = NULL; 2805 b->row = perm; 2806 b->col = perm; 2807 2808 PetscCall(PetscObjectReference((PetscObject)perm)); 2809 PetscCall(PetscObjectReference((PetscObject)perm)); 2810 2811 b->icol = iperm; 2812 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2813 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2814 b->maxnz = b->nz = ui[am]; 2815 b->free_a = PETSC_TRUE; 2816 b->free_ij = PETSC_TRUE; 2817 2818 fact->info.factor_mallocs = reallocs; 2819 fact->info.fill_ratio_given = fill; 2820 if (ai[am] != 0) { 2821 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 2822 } else { 2823 fact->info.fill_ratio_needed = 0.0; 2824 } 2825 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2826 PetscFunctionReturn(PETSC_SUCCESS); 2827 } 2828 #endif 2829 2830 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2831 { 2832 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2833 Mat_SeqSBAIJ *b; 2834 PetscBool perm_identity, missing; 2835 PetscReal fill = info->fill; 2836 const PetscInt *rip, *riip; 2837 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2838 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2839 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2840 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2841 PetscBT lnkbt; 2842 IS iperm; 2843 2844 PetscFunctionBegin; 2845 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2846 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2847 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2848 2849 /* check whether perm is the identity mapping */ 2850 PetscCall(ISIdentity(perm, &perm_identity)); 2851 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2852 PetscCall(ISGetIndices(iperm, &riip)); 2853 PetscCall(ISGetIndices(perm, &rip)); 2854 2855 /* initialization */ 2856 PetscCall(PetscMalloc1(am + 1, &ui)); 2857 PetscCall(PetscMalloc1(am + 1, &udiag)); 2858 ui[0] = 0; 2859 2860 /* jl: linked list for storing indices of the pivot rows 2861 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2862 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2863 for (i = 0; i < am; i++) { 2864 jl[i] = am; 2865 il[i] = 0; 2866 } 2867 2868 /* create and initialize a linked list for storing column indices of the active row k */ 2869 nlnk = am + 1; 2870 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2871 2872 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2873 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2874 current_space = free_space; 2875 2876 for (k = 0; k < am; k++) { /* for each active row k */ 2877 /* initialize lnk by the column indices of row rip[k] of A */ 2878 nzk = 0; 2879 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2880 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2881 ncols_upper = 0; 2882 for (j = 0; j < ncols; j++) { 2883 i = riip[*(aj + ai[rip[k]] + j)]; 2884 if (i >= k) { /* only take upper triangular entry */ 2885 cols[ncols_upper] = i; 2886 ncols_upper++; 2887 } 2888 } 2889 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2890 nzk += nlnk; 2891 2892 /* update lnk by computing fill-in for each pivot row to be merged in */ 2893 prow = jl[k]; /* 1st pivot row */ 2894 2895 while (prow < k) { 2896 nextprow = jl[prow]; 2897 /* merge prow into k-th row */ 2898 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2899 jmax = ui[prow + 1]; 2900 ncols = jmax - jmin; 2901 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2902 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2903 nzk += nlnk; 2904 2905 /* update il and jl for prow */ 2906 if (jmin < jmax) { 2907 il[prow] = jmin; 2908 j = *uj_ptr; 2909 jl[prow] = jl[j]; 2910 jl[j] = prow; 2911 } 2912 prow = nextprow; 2913 } 2914 2915 /* if free space is not available, make more free space */ 2916 if (current_space->local_remaining < nzk) { 2917 i = am - k + 1; /* num of unfactored rows */ 2918 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2919 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2920 reallocs++; 2921 } 2922 2923 /* copy data into free space, then initialize lnk */ 2924 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2925 2926 /* add the k-th row into il and jl */ 2927 if (nzk > 1) { 2928 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2929 jl[k] = jl[i]; 2930 jl[i] = k; 2931 il[k] = ui[k] + 1; 2932 } 2933 ui_ptr[k] = current_space->array; 2934 2935 current_space->array += nzk; 2936 current_space->local_used += nzk; 2937 current_space->local_remaining -= nzk; 2938 2939 ui[k + 1] = ui[k] + nzk; 2940 } 2941 2942 PetscCall(ISRestoreIndices(perm, &rip)); 2943 PetscCall(ISRestoreIndices(iperm, &riip)); 2944 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2945 2946 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2947 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2948 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2949 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2950 2951 /* put together the new matrix in MATSEQSBAIJ format */ 2952 2953 b = (Mat_SeqSBAIJ *)fact->data; 2954 b->singlemalloc = PETSC_FALSE; 2955 b->free_a = PETSC_TRUE; 2956 b->free_ij = PETSC_TRUE; 2957 2958 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 2959 2960 b->j = uj; 2961 b->i = ui; 2962 b->diag = udiag; 2963 b->free_diag = PETSC_TRUE; 2964 b->ilen = NULL; 2965 b->imax = NULL; 2966 b->row = perm; 2967 b->col = perm; 2968 2969 PetscCall(PetscObjectReference((PetscObject)perm)); 2970 PetscCall(PetscObjectReference((PetscObject)perm)); 2971 2972 b->icol = iperm; 2973 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2974 2975 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2976 2977 b->maxnz = b->nz = ui[am]; 2978 2979 fact->info.factor_mallocs = reallocs; 2980 fact->info.fill_ratio_given = fill; 2981 if (ai[am] != 0) { 2982 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2983 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2984 } else { 2985 fact->info.fill_ratio_needed = 0.0; 2986 } 2987 #if defined(PETSC_USE_INFO) 2988 if (ai[am] != 0) { 2989 PetscReal af = fact->info.fill_ratio_needed; 2990 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2991 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2992 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2993 } else { 2994 PetscCall(PetscInfo(A, "Empty matrix\n")); 2995 } 2996 #endif 2997 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2998 PetscFunctionReturn(PETSC_SUCCESS); 2999 } 3000 3001 #if 0 3002 // unused 3003 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 3004 { 3005 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3006 Mat_SeqSBAIJ *b; 3007 PetscBool perm_identity, missing; 3008 PetscReal fill = info->fill; 3009 const PetscInt *rip, *riip; 3010 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 3011 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 3012 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 3013 PetscFreeSpaceList free_space = NULL, current_space = NULL; 3014 PetscBT lnkbt; 3015 IS iperm; 3016 3017 PetscFunctionBegin; 3018 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 3019 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3020 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3021 3022 /* check whether perm is the identity mapping */ 3023 PetscCall(ISIdentity(perm, &perm_identity)); 3024 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 3025 PetscCall(ISGetIndices(iperm, &riip)); 3026 PetscCall(ISGetIndices(perm, &rip)); 3027 3028 /* initialization */ 3029 PetscCall(PetscMalloc1(am + 1, &ui)); 3030 ui[0] = 0; 3031 3032 /* jl: linked list for storing indices of the pivot rows 3033 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3034 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 3035 for (i = 0; i < am; i++) { 3036 jl[i] = am; 3037 il[i] = 0; 3038 } 3039 3040 /* create and initialize a linked list for storing column indices of the active row k */ 3041 nlnk = am + 1; 3042 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 3043 3044 /* initial FreeSpace size is fill*(ai[am]+1) */ 3045 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 3046 current_space = free_space; 3047 3048 for (k = 0; k < am; k++) { /* for each active row k */ 3049 /* initialize lnk by the column indices of row rip[k] of A */ 3050 nzk = 0; 3051 ncols = ai[rip[k] + 1] - ai[rip[k]]; 3052 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 3053 ncols_upper = 0; 3054 for (j = 0; j < ncols; j++) { 3055 i = riip[*(aj + ai[rip[k]] + j)]; 3056 if (i >= k) { /* only take upper triangular entry */ 3057 cols[ncols_upper] = i; 3058 ncols_upper++; 3059 } 3060 } 3061 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 3062 nzk += nlnk; 3063 3064 /* update lnk by computing fill-in for each pivot row to be merged in */ 3065 prow = jl[k]; /* 1st pivot row */ 3066 3067 while (prow < k) { 3068 nextprow = jl[prow]; 3069 /* merge prow into k-th row */ 3070 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3071 jmax = ui[prow + 1]; 3072 ncols = jmax - jmin; 3073 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3074 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 3075 nzk += nlnk; 3076 3077 /* update il and jl for prow */ 3078 if (jmin < jmax) { 3079 il[prow] = jmin; 3080 j = *uj_ptr; 3081 jl[prow] = jl[j]; 3082 jl[j] = prow; 3083 } 3084 prow = nextprow; 3085 } 3086 3087 /* if free space is not available, make more free space */ 3088 if (current_space->local_remaining < nzk) { 3089 i = am - k + 1; /* num of unfactored rows */ 3090 i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3091 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 3092 reallocs++; 3093 } 3094 3095 /* copy data into free space, then initialize lnk */ 3096 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 3097 3098 /* add the k-th row into il and jl */ 3099 if (nzk - 1 > 0) { 3100 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3101 jl[k] = jl[i]; 3102 jl[i] = k; 3103 il[k] = ui[k] + 1; 3104 } 3105 ui_ptr[k] = current_space->array; 3106 3107 current_space->array += nzk; 3108 current_space->local_used += nzk; 3109 current_space->local_remaining -= nzk; 3110 3111 ui[k + 1] = ui[k] + nzk; 3112 } 3113 3114 #if defined(PETSC_USE_INFO) 3115 if (ai[am] != 0) { 3116 PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am]; 3117 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3118 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3119 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3120 } else { 3121 PetscCall(PetscInfo(A, "Empty matrix\n")); 3122 } 3123 #endif 3124 3125 PetscCall(ISRestoreIndices(perm, &rip)); 3126 PetscCall(ISRestoreIndices(iperm, &riip)); 3127 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 3128 3129 /* destroy list of free space and other temporary array(s) */ 3130 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 3131 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 3132 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3133 3134 /* put together the new matrix in MATSEQSBAIJ format */ 3135 3136 b = (Mat_SeqSBAIJ *)fact->data; 3137 b->singlemalloc = PETSC_FALSE; 3138 b->free_a = PETSC_TRUE; 3139 b->free_ij = PETSC_TRUE; 3140 3141 PetscCall(PetscMalloc1(ui[am] + 1, &b->a)); 3142 3143 b->j = uj; 3144 b->i = ui; 3145 b->diag = NULL; 3146 b->ilen = NULL; 3147 b->imax = NULL; 3148 b->row = perm; 3149 b->col = perm; 3150 3151 PetscCall(PetscObjectReference((PetscObject)perm)); 3152 PetscCall(PetscObjectReference((PetscObject)perm)); 3153 3154 b->icol = iperm; 3155 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3156 3157 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 3158 b->maxnz = b->nz = ui[am]; 3159 3160 fact->info.factor_mallocs = reallocs; 3161 fact->info.fill_ratio_given = fill; 3162 if (ai[am] != 0) { 3163 fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am]; 3164 } else { 3165 fact->info.fill_ratio_needed = 0.0; 3166 } 3167 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3168 PetscFunctionReturn(PETSC_SUCCESS); 3169 } 3170 #endif 3171 3172 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 3173 { 3174 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3175 PetscInt n = A->rmap->n; 3176 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3177 PetscScalar *x, sum; 3178 const PetscScalar *b; 3179 const MatScalar *aa = a->a, *v; 3180 PetscInt i, nz; 3181 3182 PetscFunctionBegin; 3183 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3184 3185 PetscCall(VecGetArrayRead(bb, &b)); 3186 PetscCall(VecGetArrayWrite(xx, &x)); 3187 3188 /* forward solve the lower triangular */ 3189 x[0] = b[0]; 3190 v = aa; 3191 vi = aj; 3192 for (i = 1; i < n; i++) { 3193 nz = ai[i + 1] - ai[i]; 3194 sum = b[i]; 3195 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3196 v += nz; 3197 vi += nz; 3198 x[i] = sum; 3199 } 3200 3201 /* backward solve the upper triangular */ 3202 for (i = n - 1; i >= 0; i--) { 3203 v = aa + adiag[i + 1] + 1; 3204 vi = aj + adiag[i + 1] + 1; 3205 nz = adiag[i] - adiag[i + 1] - 1; 3206 sum = x[i]; 3207 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3208 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3209 } 3210 3211 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3212 PetscCall(VecRestoreArrayRead(bb, &b)); 3213 PetscCall(VecRestoreArrayWrite(xx, &x)); 3214 PetscFunctionReturn(PETSC_SUCCESS); 3215 } 3216 3217 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 3218 { 3219 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3220 IS iscol = a->col, isrow = a->row; 3221 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3222 const PetscInt *rout, *cout, *r, *c; 3223 PetscScalar *x, *tmp, sum; 3224 const PetscScalar *b; 3225 const MatScalar *aa = a->a, *v; 3226 3227 PetscFunctionBegin; 3228 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3229 3230 PetscCall(VecGetArrayRead(bb, &b)); 3231 PetscCall(VecGetArrayWrite(xx, &x)); 3232 tmp = a->solve_work; 3233 3234 PetscCall(ISGetIndices(isrow, &rout)); 3235 r = rout; 3236 PetscCall(ISGetIndices(iscol, &cout)); 3237 c = cout; 3238 3239 /* forward solve the lower triangular */ 3240 tmp[0] = b[r[0]]; 3241 v = aa; 3242 vi = aj; 3243 for (i = 1; i < n; i++) { 3244 nz = ai[i + 1] - ai[i]; 3245 sum = b[r[i]]; 3246 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3247 tmp[i] = sum; 3248 v += nz; 3249 vi += nz; 3250 } 3251 3252 /* backward solve the upper triangular */ 3253 for (i = n - 1; i >= 0; i--) { 3254 v = aa + adiag[i + 1] + 1; 3255 vi = aj + adiag[i + 1] + 1; 3256 nz = adiag[i] - adiag[i + 1] - 1; 3257 sum = tmp[i]; 3258 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3259 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3260 } 3261 3262 PetscCall(ISRestoreIndices(isrow, &rout)); 3263 PetscCall(ISRestoreIndices(iscol, &cout)); 3264 PetscCall(VecRestoreArrayRead(bb, &b)); 3265 PetscCall(VecRestoreArrayWrite(xx, &x)); 3266 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3267 PetscFunctionReturn(PETSC_SUCCESS); 3268 } 3269 3270 #if 0 3271 // unused 3272 /* 3273 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3274 */ 3275 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) 3276 { 3277 Mat B = *fact; 3278 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3279 IS isicol; 3280 const PetscInt *r, *ic; 3281 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3282 PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3283 PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 3284 PetscInt nlnk, *lnk; 3285 PetscBT lnkbt; 3286 PetscBool row_identity, icol_identity; 3287 MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 3288 const PetscInt *ics; 3289 PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3290 PetscReal dt = info->dt, shift = info->shiftamount; 3291 PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3292 PetscBool missing; 3293 3294 PetscFunctionBegin; 3295 if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005; 3296 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3297 3298 /* symbolic factorization, can be reused */ 3299 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3300 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3301 adiag = a->diag; 3302 3303 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3304 3305 /* bdiag is location of diagonal in factor */ 3306 PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 3307 PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3308 3309 /* allocate row pointers bi */ 3310 PetscCall(PetscMalloc1(2 * n + 2, &bi)); 3311 3312 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3313 if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 3314 nnz_max = ai[n] + 2 * n * dtcount + 2; 3315 3316 PetscCall(PetscMalloc1(nnz_max + 1, &bj)); 3317 PetscCall(PetscMalloc1(nnz_max + 1, &ba)); 3318 3319 /* put together the new matrix */ 3320 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3321 b = (Mat_SeqAIJ *)B->data; 3322 3323 b->free_a = PETSC_TRUE; 3324 b->free_ij = PETSC_TRUE; 3325 b->singlemalloc = PETSC_FALSE; 3326 3327 b->a = ba; 3328 b->j = bj; 3329 b->i = bi; 3330 b->diag = bdiag; 3331 b->ilen = NULL; 3332 b->imax = NULL; 3333 b->row = isrow; 3334 b->col = iscol; 3335 PetscCall(PetscObjectReference((PetscObject)isrow)); 3336 PetscCall(PetscObjectReference((PetscObject)iscol)); 3337 b->icol = isicol; 3338 3339 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3340 b->maxnz = nnz_max; 3341 3342 B->factortype = MAT_FACTOR_ILUDT; 3343 B->info.factor_mallocs = 0; 3344 B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 3345 /* end of symbolic factorization */ 3346 3347 PetscCall(ISGetIndices(isrow, &r)); 3348 PetscCall(ISGetIndices(isicol, &ic)); 3349 ics = ic; 3350 3351 /* linked list for storing column indices of the active row */ 3352 nlnk = n + 1; 3353 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3354 3355 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3356 PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 3357 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3358 PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 3359 PetscCall(PetscArrayzero(rtmp, n)); 3360 3361 bi[0] = 0; 3362 bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3363 bdiag_rev[n] = bdiag[0]; 3364 bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3365 for (i = 0; i < n; i++) { 3366 /* copy initial fill into linked list */ 3367 nzi = ai[r[i] + 1] - ai[r[i]]; 3368 PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 3369 nzi_al = adiag[r[i]] - ai[r[i]]; 3370 nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3371 ajtmp = aj + ai[r[i]]; 3372 PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3373 3374 /* load in initial (unfactored row) */ 3375 aatmp = a->a + ai[r[i]]; 3376 for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3377 3378 /* add pivot rows into linked list */ 3379 row = lnk[n]; 3380 while (row < i) { 3381 nzi_bl = bi[row + 1] - bi[row] + 1; 3382 bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 3383 PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3384 nzi += nlnk; 3385 row = lnk[row]; 3386 } 3387 3388 /* copy data from lnk into jtmp, then initialize lnk */ 3389 PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3390 3391 /* numerical factorization */ 3392 bjtmp = jtmp; 3393 row = *bjtmp++; /* 1st pivot row */ 3394 while (row < i) { 3395 pc = rtmp + row; 3396 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3397 multiplier = (*pc) * (*pv); 3398 *pc = multiplier; 3399 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3400 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3401 pv = ba + bdiag[row + 1] + 1; 3402 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3403 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3404 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3405 } 3406 row = *bjtmp++; 3407 } 3408 3409 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3410 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3411 nzi_bl = 0; 3412 j = 0; 3413 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3414 vtmp[j] = rtmp[jtmp[j]]; 3415 rtmp[jtmp[j]] = 0.0; 3416 nzi_bl++; 3417 j++; 3418 } 3419 nzi_bu = nzi - nzi_bl - 1; 3420 while (j < nzi) { 3421 vtmp[j] = rtmp[jtmp[j]]; 3422 rtmp[jtmp[j]] = 0.0; 3423 j++; 3424 } 3425 3426 bjtmp = bj + bi[i]; 3427 batmp = ba + bi[i]; 3428 /* apply level dropping rule to L part */ 3429 ncut = nzi_al + dtcount; 3430 if (ncut < nzi_bl) { 3431 PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 3432 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3433 } else { 3434 ncut = nzi_bl; 3435 } 3436 for (j = 0; j < ncut; j++) { 3437 bjtmp[j] = jtmp[j]; 3438 batmp[j] = vtmp[j]; 3439 } 3440 bi[i + 1] = bi[i] + ncut; 3441 nzi = ncut + 1; 3442 3443 /* apply level dropping rule to U part */ 3444 ncut = nzi_au + dtcount; 3445 if (ncut < nzi_bu) { 3446 PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 3447 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 3448 } else { 3449 ncut = nzi_bu; 3450 } 3451 nzi += ncut; 3452 3453 /* mark bdiagonal */ 3454 bdiag[i + 1] = bdiag[i] - (ncut + 1); 3455 bdiag_rev[n - i - 1] = bdiag[i + 1]; 3456 bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 3457 bjtmp = bj + bdiag[i]; 3458 batmp = ba + bdiag[i]; 3459 *bjtmp = i; 3460 *batmp = diag_tmp; /* rtmp[i]; */ 3461 if (*batmp == 0.0) *batmp = dt + shift; 3462 *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 3463 3464 bjtmp = bj + bdiag[i + 1] + 1; 3465 batmp = ba + bdiag[i + 1] + 1; 3466 for (k = 0; k < ncut; k++) { 3467 bjtmp[k] = jtmp[nzi_bl + 1 + k]; 3468 batmp[k] = vtmp[nzi_bl + 1 + k]; 3469 } 3470 3471 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3472 } /* for (i=0; i<n; i++) */ 3473 PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3474 3475 PetscCall(ISRestoreIndices(isrow, &r)); 3476 PetscCall(ISRestoreIndices(isicol, &ic)); 3477 3478 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3479 PetscCall(PetscFree2(im, jtmp)); 3480 PetscCall(PetscFree2(rtmp, vtmp)); 3481 PetscCall(PetscFree(bdiag_rev)); 3482 3483 PetscCall(PetscLogFlops(B->cmap->n)); 3484 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3485 3486 PetscCall(ISIdentity(isrow, &row_identity)); 3487 PetscCall(ISIdentity(isicol, &icol_identity)); 3488 if (row_identity && icol_identity) { 3489 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3490 } else { 3491 B->ops->solve = MatSolve_SeqAIJ; 3492 } 3493 3494 B->ops->solveadd = NULL; 3495 B->ops->solvetranspose = NULL; 3496 B->ops->solvetransposeadd = NULL; 3497 B->ops->matsolve = NULL; 3498 B->assembled = PETSC_TRUE; 3499 B->preallocated = PETSC_TRUE; 3500 PetscFunctionReturn(PETSC_SUCCESS); 3501 } 3502 3503 /* a wrapper of MatILUDTFactor_SeqAIJ() */ 3504 /* 3505 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3506 */ 3507 3508 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 3509 { 3510 PetscFunctionBegin; 3511 PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 3512 PetscFunctionReturn(PETSC_SUCCESS); 3513 } 3514 3515 /* 3516 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3517 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3518 */ 3519 /* 3520 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3521 */ 3522 3523 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) 3524 { 3525 Mat C = fact; 3526 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 3527 IS isrow = b->row, isicol = b->icol; 3528 const PetscInt *r, *ic, *ics; 3529 PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3530 PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 3531 MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3532 PetscReal dt = info->dt, shift = info->shiftamount; 3533 PetscBool row_identity, col_identity; 3534 3535 PetscFunctionBegin; 3536 PetscCall(ISGetIndices(isrow, &r)); 3537 PetscCall(ISGetIndices(isicol, &ic)); 3538 PetscCall(PetscMalloc1(n + 1, &rtmp)); 3539 ics = ic; 3540 3541 for (i = 0; i < n; i++) { 3542 /* initialize rtmp array */ 3543 nzl = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */ 3544 bjtmp = bj + bi[i]; 3545 for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3546 rtmp[i] = 0.0; 3547 nzu = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */ 3548 bjtmp = bj + bdiag[i + 1] + 1; 3549 for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 3550 3551 /* load in initial unfactored row of A */ 3552 nz = ai[r[i] + 1] - ai[r[i]]; 3553 ajtmp = aj + ai[r[i]]; 3554 v = aa + ai[r[i]]; 3555 for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 3556 3557 /* numerical factorization */ 3558 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3559 nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3560 k = 0; 3561 while (k < nzl) { 3562 row = *bjtmp++; 3563 pc = rtmp + row; 3564 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3565 multiplier = (*pc) * (*pv); 3566 *pc = multiplier; 3567 if (PetscAbsScalar(multiplier) > dt) { 3568 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3569 pv = b->a + bdiag[row + 1] + 1; 3570 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3571 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3572 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3573 } 3574 k++; 3575 } 3576 3577 /* finished row so stick it into b->a */ 3578 /* L-part */ 3579 pv = b->a + bi[i]; 3580 pj = bj + bi[i]; 3581 nzl = bi[i + 1] - bi[i]; 3582 for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3583 3584 /* diagonal: invert diagonal entries for simpler triangular solves */ 3585 if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3586 b->a[bdiag[i]] = 1.0 / rtmp[i]; 3587 3588 /* U-part */ 3589 pv = b->a + bdiag[i + 1] + 1; 3590 pj = bj + bdiag[i + 1] + 1; 3591 nzu = bdiag[i] - bdiag[i + 1] - 1; 3592 for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3593 } 3594 3595 PetscCall(PetscFree(rtmp)); 3596 PetscCall(ISRestoreIndices(isicol, &ic)); 3597 PetscCall(ISRestoreIndices(isrow, &r)); 3598 3599 PetscCall(ISIdentity(isrow, &row_identity)); 3600 PetscCall(ISIdentity(isicol, &col_identity)); 3601 if (row_identity && col_identity) { 3602 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3603 } else { 3604 C->ops->solve = MatSolve_SeqAIJ; 3605 } 3606 C->ops->solveadd = NULL; 3607 C->ops->solvetranspose = NULL; 3608 C->ops->solvetransposeadd = NULL; 3609 C->ops->matsolve = NULL; 3610 C->assembled = PETSC_TRUE; 3611 C->preallocated = PETSC_TRUE; 3612 3613 PetscCall(PetscLogFlops(C->cmap->n)); 3614 PetscFunctionReturn(PETSC_SUCCESS); 3615 } 3616 #endif 3617