1 /* 2 Factorization code for BAIJ format. 3 */ 4 5 #include <../src/mat/impls/baij/seq/baij.h> 6 #include <petsc/private/kernels/blockinvert.h> 7 #include <petscbt.h> 8 #include <../src/mat/utils/freespace.h> 9 10 PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat, Mat, MatDuplicateOption, PetscBool); 11 12 /* 13 This is not much faster than MatLUFactorNumeric_SeqBAIJ_N() but the solve is faster at least sometimes 14 */ 15 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) 16 { 17 Mat C = B; 18 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 19 PetscInt i, j, k, ipvt[15]; 20 const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *ajtmp, *bjtmp, *bdiag = b->diag, *pj; 21 PetscInt nz, nzL, row; 22 MatScalar *rtmp, *pc, *mwork, *pv, *vv, work[225]; 23 const MatScalar *v, *aa = a->a; 24 PetscInt bs2 = a->bs2, bs = A->rmap->bs, flg; 25 PetscInt sol_ver; 26 PetscBool allowzeropivot, zeropivotdetected; 27 28 PetscFunctionBegin; 29 allowzeropivot = PetscNot(A->erroriffailure); 30 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-sol_ver", &sol_ver, NULL)); 31 32 /* generate work space needed by the factorization */ 33 PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork)); 34 PetscCall(PetscArrayzero(rtmp, bs2 * n)); 35 36 for (i = 0; i < n; i++) { 37 /* zero rtmp */ 38 /* L part */ 39 nz = bi[i + 1] - bi[i]; 40 bjtmp = bj + bi[i]; 41 for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 42 43 /* U part */ 44 nz = bdiag[i] - bdiag[i + 1]; 45 bjtmp = bj + bdiag[i + 1] + 1; 46 for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 47 48 /* load in initial (unfactored row) */ 49 nz = ai[i + 1] - ai[i]; 50 ajtmp = aj + ai[i]; 51 v = aa + bs2 * ai[i]; 52 for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ajtmp[j], v + bs2 * j, bs2)); 53 54 /* elimination */ 55 bjtmp = bj + bi[i]; 56 nzL = bi[i + 1] - bi[i]; 57 for (k = 0; k < nzL; k++) { 58 row = bjtmp[k]; 59 pc = rtmp + bs2 * row; 60 for (flg = 0, j = 0; j < bs2; j++) { 61 if (pc[j] != 0.0) { 62 flg = 1; 63 break; 64 } 65 } 66 if (flg) { 67 pv = b->a + bs2 * bdiag[row]; 68 PetscKernel_A_gets_A_times_B(bs, pc, pv, mwork); 69 /* PetscCall(PetscKernel_A_gets_A_times_B_15(pc,pv,mwork)); */ 70 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 71 pv = b->a + bs2 * (bdiag[row + 1] + 1); 72 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */ 73 for (j = 0; j < nz; j++) { 74 vv = rtmp + bs2 * pj[j]; 75 PetscKernel_A_gets_A_minus_B_times_C(bs, vv, pc, pv); 76 /* PetscCall(PetscKernel_A_gets_A_minus_B_times_C_15(vv,pc,pv)); */ 77 pv += bs2; 78 } 79 PetscCall(PetscLogFlops(2.0 * bs2 * bs * (nz + 1) - bs2)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 80 } 81 } 82 83 /* finished row so stick it into b->a */ 84 /* L part */ 85 pv = b->a + bs2 * bi[i]; 86 pj = b->j + bi[i]; 87 nz = bi[i + 1] - bi[i]; 88 for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 89 90 /* Mark diagonal and invert diagonal for simpler triangular solves */ 91 pv = b->a + bs2 * bdiag[i]; 92 pj = b->j + bdiag[i]; 93 PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2)); 94 PetscCall(PetscKernel_A_gets_inverse_A_15(pv, ipvt, work, info->shiftamount, allowzeropivot, &zeropivotdetected)); 95 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 96 97 /* U part */ 98 pv = b->a + bs2 * (bdiag[i + 1] + 1); 99 pj = b->j + bdiag[i + 1] + 1; 100 nz = bdiag[i] - bdiag[i + 1] - 1; 101 for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 102 } 103 104 PetscCall(PetscFree2(rtmp, mwork)); 105 106 C->ops->solve = MatSolve_SeqBAIJ_15_NaturalOrdering_ver1; 107 C->ops->solvetranspose = MatSolve_SeqBAIJ_N_NaturalOrdering; 108 C->assembled = PETSC_TRUE; 109 110 PetscCall(PetscLogFlops(1.333333333333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 111 PetscFunctionReturn(PETSC_SUCCESS); 112 } 113 114 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat B, Mat A, const MatFactorInfo *info) 115 { 116 Mat C = B; 117 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data; 118 IS isrow = b->row, isicol = b->icol; 119 const PetscInt *r, *ic; 120 PetscInt i, j, k, n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 121 PetscInt *ajtmp, *bjtmp, nz, nzL, row, *bdiag = b->diag, *pj; 122 MatScalar *rtmp, *pc, *mwork, *v, *pv, *aa = a->a; 123 PetscInt bs = A->rmap->bs, bs2 = a->bs2, *v_pivots, flg; 124 MatScalar *v_work; 125 PetscBool col_identity, row_identity, both_identity; 126 PetscBool allowzeropivot, zeropivotdetected; 127 128 PetscFunctionBegin; 129 PetscCall(ISGetIndices(isrow, &r)); 130 PetscCall(ISGetIndices(isicol, &ic)); 131 allowzeropivot = PetscNot(A->erroriffailure); 132 133 PetscCall(PetscCalloc1(bs2 * n, &rtmp)); 134 135 /* generate work space needed by dense LU factorization */ 136 PetscCall(PetscMalloc3(bs, &v_work, bs2, &mwork, bs, &v_pivots)); 137 138 for (i = 0; i < n; i++) { 139 /* zero rtmp */ 140 /* L part */ 141 nz = bi[i + 1] - bi[i]; 142 bjtmp = bj + bi[i]; 143 for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 144 145 /* U part */ 146 nz = bdiag[i] - bdiag[i + 1]; 147 bjtmp = bj + bdiag[i + 1] + 1; 148 for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2)); 149 150 /* load in initial (unfactored row) */ 151 nz = ai[r[i] + 1] - ai[r[i]]; 152 ajtmp = aj + ai[r[i]]; 153 v = aa + bs2 * ai[r[i]]; 154 for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ic[ajtmp[j]], v + bs2 * j, bs2)); 155 156 /* elimination */ 157 bjtmp = bj + bi[i]; 158 nzL = bi[i + 1] - bi[i]; 159 for (k = 0; k < nzL; k++) { 160 row = bjtmp[k]; 161 pc = rtmp + bs2 * row; 162 for (flg = 0, j = 0; j < bs2; j++) { 163 if (pc[j] != 0.0) { 164 flg = 1; 165 break; 166 } 167 } 168 if (flg) { 169 pv = b->a + bs2 * bdiag[row]; 170 PetscKernel_A_gets_A_times_B(bs, pc, pv, mwork); /* *pc = *pc * (*pv); */ 171 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 172 pv = b->a + bs2 * (bdiag[row + 1] + 1); 173 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */ 174 for (j = 0; j < nz; j++) PetscKernel_A_gets_A_minus_B_times_C(bs, rtmp + bs2 * pj[j], pc, pv + bs2 * j); 175 PetscCall(PetscLogFlops(2.0 * bs2 * bs * (nz + 1) - bs2)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 176 } 177 } 178 179 /* finished row so stick it into b->a */ 180 /* L part */ 181 pv = b->a + bs2 * bi[i]; 182 pj = b->j + bi[i]; 183 nz = bi[i + 1] - bi[i]; 184 for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 185 186 /* Mark diagonal and invert diagonal for simpler triangular solves */ 187 pv = b->a + bs2 * bdiag[i]; 188 pj = b->j + bdiag[i]; 189 PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2)); 190 191 PetscCall(PetscKernel_A_gets_inverse_A(bs, pv, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 192 if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 193 194 /* U part */ 195 pv = b->a + bs2 * (bdiag[i + 1] + 1); 196 pj = b->j + bdiag[i + 1] + 1; 197 nz = bdiag[i] - bdiag[i + 1] - 1; 198 for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2)); 199 } 200 201 PetscCall(PetscFree(rtmp)); 202 PetscCall(PetscFree3(v_work, mwork, v_pivots)); 203 PetscCall(ISRestoreIndices(isicol, &ic)); 204 PetscCall(ISRestoreIndices(isrow, &r)); 205 206 PetscCall(ISIdentity(isrow, &row_identity)); 207 PetscCall(ISIdentity(isicol, &col_identity)); 208 209 both_identity = (PetscBool)(row_identity && col_identity); 210 if (both_identity) { 211 switch (bs) { 212 case 9: 213 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 214 C->ops->solve = MatSolve_SeqBAIJ_9_NaturalOrdering; 215 #else 216 C->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering; 217 #endif 218 break; 219 case 11: 220 C->ops->solve = MatSolve_SeqBAIJ_11_NaturalOrdering; 221 break; 222 case 12: 223 C->ops->solve = MatSolve_SeqBAIJ_12_NaturalOrdering; 224 break; 225 case 13: 226 C->ops->solve = MatSolve_SeqBAIJ_13_NaturalOrdering; 227 break; 228 case 14: 229 C->ops->solve = MatSolve_SeqBAIJ_14_NaturalOrdering; 230 break; 231 default: 232 C->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering; 233 break; 234 } 235 } else { 236 C->ops->solve = MatSolve_SeqBAIJ_N; 237 } 238 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_N; 239 240 C->assembled = PETSC_TRUE; 241 242 PetscCall(PetscLogFlops(1.333333333333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 /* 247 ilu(0) with natural ordering under new data structure. 248 See MatILUFactorSymbolic_SeqAIJ_ilu0() for detailed description 249 because this code is almost identical to MatILUFactorSymbolic_SeqAIJ_ilu0_inplace(). 250 */ 251 252 static PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 253 { 254 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 255 PetscInt n = a->mbs, *ai = a->i, *aj, *adiag = a->diag, bs2 = a->bs2; 256 PetscInt i, j, nz, *bi, *bj, *bdiag, bi_temp; 257 258 PetscFunctionBegin; 259 PetscCall(MatDuplicateNoCreate_SeqBAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 260 b = (Mat_SeqBAIJ *)fact->data; 261 262 /* allocate matrix arrays for new data structure */ 263 PetscCall(PetscShmgetAllocateArray(bs2 * ai[n] + 1, sizeof(PetscScalar), (void **)&b->a)); 264 PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscInt), (void **)&b->j)); 265 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i)); 266 b->free_a = PETSC_TRUE; 267 b->free_ij = PETSC_TRUE; 268 fact->preallocated = PETSC_TRUE; 269 fact->assembled = PETSC_TRUE; 270 if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 271 bdiag = b->diag; 272 273 if (n > 0) PetscCall(PetscArrayzero(b->a, bs2 * ai[n])); 274 275 /* set bi and bj with new data structure */ 276 bi = b->i; 277 bj = b->j; 278 279 /* L part */ 280 bi[0] = 0; 281 for (i = 0; i < n; i++) { 282 nz = adiag[i] - ai[i]; 283 bi[i + 1] = bi[i] + nz; 284 aj = a->j + ai[i]; 285 for (j = 0; j < nz; j++) { 286 *bj = aj[j]; 287 bj++; 288 } 289 } 290 291 /* U part */ 292 bi_temp = bi[n]; 293 bdiag[n] = bi[n] - 1; 294 for (i = n - 1; i >= 0; i--) { 295 nz = ai[i + 1] - adiag[i] - 1; 296 bi_temp = bi_temp + nz + 1; 297 aj = a->j + adiag[i] + 1; 298 for (j = 0; j < nz; j++) { 299 *bj = aj[j]; 300 bj++; 301 } 302 /* diag[i] */ 303 *bj = i; 304 bj++; 305 bdiag[i] = bi_temp - 1; 306 } 307 PetscFunctionReturn(PETSC_SUCCESS); 308 } 309 310 PetscErrorCode MatILUFactorSymbolic_SeqBAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 311 { 312 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 313 IS isicol; 314 const PetscInt *r, *ic; 315 PetscInt n = a->mbs, *ai = a->i, *aj = a->j, d; 316 PetscInt *bi, *cols, nnz, *cols_lvl; 317 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 318 PetscInt i, levels, diagonal_fill; 319 PetscBool col_identity, row_identity, both_identity; 320 PetscReal f; 321 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 322 PetscBT lnkbt; 323 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 324 PetscFreeSpaceList free_space = NULL, current_space = NULL; 325 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 326 PetscBool missing; 327 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 328 329 PetscFunctionBegin; 330 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 331 if (bs > 1) { /* check shifttype */ 332 PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 333 } 334 335 PetscCall(MatMissingDiagonal(A, &missing, &d)); 336 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 337 338 f = info->fill; 339 levels = (PetscInt)info->levels; 340 diagonal_fill = (PetscInt)info->diagonal_fill; 341 342 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 343 344 PetscCall(ISIdentity(isrow, &row_identity)); 345 PetscCall(ISIdentity(iscol, &col_identity)); 346 347 both_identity = (PetscBool)(row_identity && col_identity); 348 349 if (!levels && both_identity) { 350 /* special case: ilu(0) with natural ordering */ 351 PetscCall(MatILUFactorSymbolic_SeqBAIJ_ilu0(fact, A, isrow, iscol, info)); 352 PetscCall(MatSeqBAIJSetNumericFactorization(fact, both_identity)); 353 354 fact->factortype = MAT_FACTOR_ILU; 355 fact->info.factor_mallocs = 0; 356 fact->info.fill_ratio_given = info->fill; 357 fact->info.fill_ratio_needed = 1.0; 358 359 b = (Mat_SeqBAIJ *)fact->data; 360 b->row = isrow; 361 b->col = iscol; 362 b->icol = isicol; 363 PetscCall(PetscObjectReference((PetscObject)isrow)); 364 PetscCall(PetscObjectReference((PetscObject)iscol)); 365 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 366 367 PetscCall(PetscMalloc1((n + 1) * bs, &b->solve_work)); 368 PetscFunctionReturn(PETSC_SUCCESS); 369 } 370 371 PetscCall(ISGetIndices(isrow, &r)); 372 PetscCall(ISGetIndices(isicol, &ic)); 373 374 /* get new row pointers */ 375 PetscCall(PetscMalloc1(n + 1, &bi)); 376 bi[0] = 0; 377 /* bdiag is location of diagonal in factor */ 378 PetscCall(PetscMalloc1(n + 1, &bdiag)); 379 bdiag[0] = 0; 380 381 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 382 383 /* create a linked list for storing column indices of the active row */ 384 nlnk = n + 1; 385 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 386 387 /* initial FreeSpace size is f*(ai[n]+1) */ 388 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 389 current_space = free_space; 390 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 391 current_space_lvl = free_space_lvl; 392 393 for (i = 0; i < n; i++) { 394 nzi = 0; 395 /* copy current row into linked list */ 396 nnz = ai[r[i] + 1] - ai[r[i]]; 397 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 398 cols = aj + ai[r[i]]; 399 lnk[i] = -1; /* marker to indicate if diagonal exists */ 400 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 401 nzi += nlnk; 402 403 /* make sure diagonal entry is included */ 404 if (diagonal_fill && lnk[i] == -1) { 405 fm = n; 406 while (lnk[fm] < i) fm = lnk[fm]; 407 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 408 lnk[fm] = i; 409 lnk_lvl[i] = 0; 410 nzi++; 411 dcount++; 412 } 413 414 /* add pivot rows into the active row */ 415 nzbd = 0; 416 prow = lnk[n]; 417 while (prow < i) { 418 nnz = bdiag[prow]; 419 cols = bj_ptr[prow] + nnz + 1; 420 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 421 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 422 423 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 424 nzi += nlnk; 425 prow = lnk[prow]; 426 nzbd++; 427 } 428 bdiag[i] = nzbd; 429 bi[i + 1] = bi[i] + nzi; 430 431 /* if free space is not available, make more free space */ 432 if (current_space->local_remaining < nzi) { 433 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 434 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 435 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 436 reallocs++; 437 } 438 439 /* copy data into free_space and free_space_lvl, then initialize lnk */ 440 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 441 442 bj_ptr[i] = current_space->array; 443 bjlvl_ptr[i] = current_space_lvl->array; 444 445 /* make sure the active row i has diagonal entry */ 446 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 447 448 current_space->array += nzi; 449 current_space->local_used += nzi; 450 current_space->local_remaining -= nzi; 451 452 current_space_lvl->array += nzi; 453 current_space_lvl->local_used += nzi; 454 current_space_lvl->local_remaining -= nzi; 455 } 456 457 PetscCall(ISRestoreIndices(isrow, &r)); 458 PetscCall(ISRestoreIndices(isicol, &ic)); 459 460 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 461 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 462 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 463 464 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 465 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 466 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 467 468 #if defined(PETSC_USE_INFO) 469 { 470 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 471 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 472 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 473 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 474 PetscCall(PetscInfo(A, "for best performance.\n")); 475 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 476 } 477 #endif 478 479 /* put together the new matrix */ 480 PetscCall(MatSeqBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 481 482 b = (Mat_SeqBAIJ *)fact->data; 483 b->free_ij = PETSC_TRUE; 484 PetscCall(PetscShmgetAllocateArray(bs2 * (bdiag[0] + 1), sizeof(PetscScalar), (void **)&b->a)); 485 b->free_a = PETSC_TRUE; 486 487 b->j = bj; 488 b->i = bi; 489 b->diag = bdiag; 490 b->free_diag = PETSC_TRUE; 491 b->ilen = NULL; 492 b->imax = NULL; 493 b->row = isrow; 494 b->col = iscol; 495 PetscCall(PetscObjectReference((PetscObject)isrow)); 496 PetscCall(PetscObjectReference((PetscObject)iscol)); 497 b->icol = isicol; 498 499 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 500 /* In b structure: Free imax, ilen, old a, old j. 501 Allocate bdiag, solve_work, new a, new j */ 502 b->maxnz = b->nz = bdiag[0] + 1; 503 504 fact->info.factor_mallocs = reallocs; 505 fact->info.fill_ratio_given = f; 506 fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 507 508 PetscCall(MatSeqBAIJSetNumericFactorization(fact, both_identity)); 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511